34 files changed, 599 insertions, 227 deletions
diff --git a/src/arch/SConscript b/src/arch/SConscript
index 59cea6211..dda1dea53 100644
--- a/src/arch/SConscript
+++ b/src/arch/SConscript
@@ -50,6 +50,7 @@ isa_switch_hdrs = Split('''
 	arguments.hh
 	faults.hh
 	isa_traits.hh
+        locked_mem.hh
 	process.hh
 	regfile.hh
 	stacktrace.hh
diff --git a/src/arch/alpha/locked_mem.hh b/src/arch/alpha/locked_mem.hh
new file mode 100644
index 000000000..368ea2895
--- /dev/null
+++ b/src/arch/alpha/locked_mem.hh
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Reinhardt
+ */
+
+#ifndef __ARCH_ALPHA_LOCKED_MEM_HH__
+#define __ARCH_ALPHA_LOCKED_MEM_HH__
+
+/**
+ * @file
+ *
+ * ISA-specific helper functions for locked memory accesses.
+ */
+
+#include "arch/isa_traits.hh"
+#include "base/misc.hh"
+#include "mem/request.hh"
+
+
+namespace AlphaISA
+{
+template <class XC>
+inline void
+handleLockedRead(XC *xc, Request *req)
+{
+    xc->setMiscReg(Lock_Addr_DepTag, req->getPaddr() & ~0xf);
+    xc->setMiscReg(Lock_Flag_DepTag, true);
+}
+
+
+template <class XC>
+inline bool
+handleLockedWrite(XC *xc, Request *req)
+{
+    if (req->isUncacheable()) {
+        // Funky Turbolaser mailbox access...don't update
+        // result register (see stq_c in decoder.isa)
+        req->setScResult(2);
+    } else {
+        // standard store conditional
+        bool lock_flag = xc->readMiscReg(Lock_Flag_DepTag);
+        Addr lock_addr = xc->readMiscReg(Lock_Addr_DepTag);
+        if (!lock_flag || (req->getPaddr() & ~0xf) != lock_addr) {
+            // Lock flag not set or addr mismatch in CPU;
+            // don't even bother sending to memory system
+            req->setScResult(0);
+            xc->setMiscReg(Lock_Flag_DepTag, false);
+            // the rest of this code is not architectural;
+            // it's just a debugging aid to help detect
+            // livelock by warning on long sequences of failed
+            // store conditionals
+            int stCondFailures = xc->readStCondFailures();
+            stCondFailures++;
+            xc->setStCondFailures(stCondFailures);
+            if (stCondFailures % 100000 == 0) {
+                warn("cpu %d: %d consecutive "
+                     "store conditional failures\n",
+                     xc->readCpuId(), stCondFailures);
+            }
+
+            // store conditional failed already, so don't issue it to mem
+            return false;
+        }
+    }
+
+    return true;
+}
+
+
+} // namespace AlphaISA
+
+#endif
diff --git a/src/arch/mips/locked_mem.hh b/src/arch/mips/locked_mem.hh
new file mode 100644
index 000000000..363cf1e90
--- /dev/null
+++ b/src/arch/mips/locked_mem.hh
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Reinhardt
+ */
+
+#ifndef __ARCH_MIPS_LOCKED_MEM_HH__
+#define __ARCH_MIPS_LOCKED_MEM_HH__
+
+/**
+ * @file
+ *
+ * ISA-specific helper functions for locked memory accesses.
+ */
+
+#include "mem/request.hh"
+
+
+namespace MipsISA
+{
+template <class XC>
+inline void
+handleLockedRead(XC *xc, Request *req)
+{
+}
+
+
+template <class XC>
+inline bool
+handleLockedWrite(XC *xc, Request *req)
+{
+    return true;
+}
+
+
+} // namespace MipsISA
+
+#endif
diff --git a/src/arch/sparc/locked_mem.hh b/src/arch/sparc/locked_mem.hh
new file mode 100644
index 000000000..291b2f422
--- /dev/null
+++ b/src/arch/sparc/locked_mem.hh
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Reinhardt
+ */
+
+#ifndef __ARCH_SPARC_LOCKED_MEM_HH__
+#define __ARCH_SPARC_LOCKED_MEM_HH__
+
+/**
+ * @file
+ *
+ * ISA-specific helper functions for locked memory accesses.
+ */
+
+#include "mem/request.hh"
+
+
+namespace SparcISA
+{
+template <class XC>
+inline void
+handleLockedRead(XC *xc, Request *req)
+{
+}
+
+
+template <class XC>
+inline bool
+handleLockedWrite(XC *xc, Request *req)
+{
+    return true;
+}
+
+
+} // namespace SparcISA
+
+#endif
diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index 8e8153b68..274407be5 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -112,6 +112,7 @@ baseFlags = [
     'IdeDisk',
     'InstExec',
     'Interrupt',
+    'LLSC',
     'LSQ',
     'LSQUnit',
     'Loader',
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index e02527371..75e0d86af 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -140,8 +140,8 @@ class BaseCPU : public MemObject
         bool functionTrace;
         Tick functionTraceStart;
         System *system;
-#if FULL_SYSTEM
         int cpu_id;
+#if FULL_SYSTEM
         Tick profile;
 #endif
         Tick progress_interval;
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index f2109e88d..d6cdff5c5 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -193,7 +193,7 @@ BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags)
     // note this is a local, not BaseDynInst::fault
     Fault trans_fault = cpu->translateDataReadReq(req);
 
-    if (trans_fault == NoFault && !(req->flags & UNCACHEABLE)) {
+    if (trans_fault == NoFault && !(req->isUncacheable())) {
         // It's a valid address to cacheable space.  Record key MemReq
         // parameters so we can generate another one just like it for
         // the timing access without calling translate() again (which
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index 1540a6b94..f6d56eef6 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -175,7 +175,7 @@ CheckerCPU::read(Addr addr, T &data, unsigned flags)
 
     pkt->dataStatic(&data);
 
-    if (!(memReq->getFlags() & UNCACHEABLE)) {
+    if (!(memReq->isUncacheable())) {
         // Access memory to see if we have the same data
         dcachePort->sendFunctional(pkt);
     } else {
@@ -251,9 +251,9 @@ CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
     // This is because the LSQ would have to be snooped in the CPU to
     // verify this data.
     if (unverifiedReq &&
-        !(unverifiedReq->getFlags() & UNCACHEABLE) &&
-        (!(unverifiedReq->getFlags() & LOCKED) ||
-         ((unverifiedReq->getFlags() & LOCKED) &&
+        !(unverifiedReq->isUncacheable()) &&
+        (!(unverifiedReq->isLocked()) ||
+         ((unverifiedReq->isLocked()) &&
           unverifiedReq->getScResult() == 1))) {
         T inst_data;
 /*
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 2d447bfe5..497179576 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -599,7 +599,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
     if (fault == NoFault) {
 #if 0
         if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) ||
-            memReq[tid]->flags & UNCACHEABLE) {
+            memReq[tid]->isUncacheable()) {
             DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
                     "misspeculating path)!",
                     memReq[tid]->paddr);
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 90d1a3d53..58945f04e 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -492,7 +492,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     // A bit of a hackish way to get uncached accesses to work only if they're
     // at the head of the LSQ and are ready to commit (at the head of the ROB
     // too).
-    if (req->getFlags() & UNCACHEABLE &&
+    if (req->isUncacheable() &&
         (load_idx != loadHead || !load_inst->isAtCommit())) {
         iewStage->rescheduleMemInst(load_inst);
         ++lsqRescheduledLoads;
@@ -509,7 +509,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             load_idx, store_idx, storeHead, req->getPaddr());
 
 #if FULL_SYSTEM
-    if (req->getFlags() & LOCKED) {
+    if (req->isLocked()) {
         cpu->lockAddr = req->getPaddr();
         cpu->lockFlag = true;
     }
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 98bea74fb..63ffcece1 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -416,7 +416,7 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
         // realizes there is activity.
         // Mark it as executed unless it is an uncached load that
         // needs to hit the head of commit.
-        if (!(inst->req->getFlags() & UNCACHEABLE) || inst->isAtCommit()) {
+        if (!(inst->req->isUncacheable()) || inst->isAtCommit()) {
             inst->setExecuted();
         }
         iewStage->instToCommit(inst);
@@ -613,8 +613,8 @@ LSQUnit<Impl>::writebackStores()
                 storeQueue[storeWBIdx].inst->seqNum);
 
         // @todo: Remove this SC hack once the memory system handles it.
-        if (req->getFlags() & LOCKED) {
-            if (req->getFlags() & UNCACHEABLE) {
+        if (req->isLocked()) {
+            if (req->isUncacheable()) {
                 req->setScResult(2);
             } else {
                 if (cpu->lockFlag) {
diff --git a/src/cpu/ozone/back_end.hh b/src/cpu/ozone/back_end.hh
index 9bab6a964..8debd277d 100644
--- a/src/cpu/ozone/back_end.hh
+++ b/src/cpu/ozone/back_end.hh
@@ -493,7 +493,7 @@ BackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
     }
 */
 /*
-    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+    if (!dcacheInterface && (memReq->isUncacheable()))
         recordEvent("Uncached Read");
 */
     return LSQ.read(req, data, load_idx);
@@ -534,7 +534,7 @@ BackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
         *res = memReq->result;
         */
 /*
-    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+    if (!dcacheInterface && (memReq->isUncacheable()))
         recordEvent("Uncached Write");
 */
     return LSQ.write(req, data, store_idx);
diff --git a/src/cpu/ozone/back_end_impl.hh b/src/cpu/ozone/back_end_impl.hh
index ac3218c02..4078699fe 100644
--- a/src/cpu/ozone/back_end_impl.hh
+++ b/src/cpu/ozone/back_end_impl.hh
@@ -1256,7 +1256,7 @@ BackEnd<Impl>::executeInsts()
 
 //                ++iewExecStoreInsts;
 
-                if (!(inst->req->flags & LOCKED)) {
+                if (!(inst->req->isLocked())) {
                     inst->setExecuted();
 
                     instToCommit(inst);
diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh
index 8c5be9424..70ec1d101 100644
--- a/src/cpu/ozone/cpu.hh
+++ b/src/cpu/ozone/cpu.hh
@@ -455,12 +455,12 @@ class OzoneCPU : public BaseCPU
     {
 #if 0
 #if FULL_SYSTEM && defined(TARGET_ALPHA)
-        if (req->flags & LOCKED) {
+        if (req->isLocked()) {
             req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
             req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
         }
 #endif
-        if (req->flags & LOCKED) {
+        if (req->isLocked()) {
             lockAddrList.insert(req->paddr);
             lockFlag = true;
         }
@@ -489,10 +489,10 @@ class OzoneCPU : public BaseCPU
         ExecContext *xc;
 
         // If this is a store conditional, act appropriately
-        if (req->flags & LOCKED) {
+        if (req->isLocked()) {
             xc = req->xc;
 
-            if (req->flags & UNCACHEABLE) {
+            if (req->isUncacheable()) {
                 // Don't update result register (see stq_c in isa_desc)
                 req->result = 2;
                 xc->setStCondFailures(0);//Needed? [RGD]
@@ -532,8 +532,8 @@ class OzoneCPU : public BaseCPU
 
 #endif
 
-        if (req->flags & LOCKED) {
-            if (req->flags & UNCACHEABLE) {
+        if (req->isLocked()) {
+            if (req->isUncacheable()) {
                 req->result = 2;
             } else {
                 if (this->lockFlag) {
diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh
index d34716de6..5956c5cba 100644
--- a/src/cpu/ozone/front_end_impl.hh
+++ b/src/cpu/ozone/front_end_impl.hh
@@ -493,7 +493,7 @@ FrontEnd<Impl>::fetchCacheLine()
     if (fault == NoFault) {
 #if 0
         if (cpu->system->memctrl->badaddr(memReq->paddr) ||
-            memReq->flags & UNCACHEABLE) {
+            memReq->isUncacheable()) {
             DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
                     "misspeculating path!",
                     memReq->paddr);
diff --git a/src/cpu/ozone/inorder_back_end.hh b/src/cpu/ozone/inorder_back_end.hh
index ffdba2f6c..76eef6fad 100644
--- a/src/cpu/ozone/inorder_back_end.hh
+++ b/src/cpu/ozone/inorder_back_end.hh
@@ -231,7 +231,7 @@ InorderBackEnd<Impl>::read(Addr addr, T &data, unsigned flags)
         }
     }
 /*
-    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+    if (!dcacheInterface && (memReq->isUncacheable()))
         recordEvent("Uncached Read");
 */
     return fault;
@@ -243,7 +243,7 @@ Fault
 InorderBackEnd<Impl>::read(MemReqPtr &req, T &data)
 {
 #if FULL_SYSTEM && defined(TARGET_ALPHA)
-    if (req->flags & LOCKED) {
+    if (req->isLocked()) {
         req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
         req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
     }
@@ -291,7 +291,7 @@ InorderBackEnd<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
     if (res && (fault == NoFault))
         *res = memReq->result;
 /*
-    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+    if (!dcacheInterface && (memReq->isUncacheable()))
         recordEvent("Uncached Write");
 */
     return fault;
@@ -306,10 +306,10 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data)
     ExecContext *xc;
 
     // If this is a store conditional, act appropriately
-    if (req->flags & LOCKED) {
+    if (req->isLocked()) {
         xc = req->xc;
 
-        if (req->flags & UNCACHEABLE) {
+        if (req->isUncacheable()) {
             // Don't update result register (see stq_c in isa_desc)
             req->result = 2;
             xc->setStCondFailures(0);//Needed? [RGD]
@@ -391,7 +391,7 @@ InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
     }
 
 /*
-    if (!dcacheInterface && (req->flags & UNCACHEABLE))
+    if (!dcacheInterface && (req->isUncacheable()))
         recordEvent("Uncached Read");
 */
     return NoFault;
@@ -455,8 +455,8 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
         }
     }
 /*
-    if (req->flags & LOCKED) {
-        if (req->flags & UNCACHEABLE) {
+    if (req->isLocked()) {
+        if (req->isUncacheable()) {
             // Don't update result register (see stq_c in isa_desc)
             req->result = 2;
         } else {
@@ -469,7 +469,7 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
         *res = req->result;
         */
 /*
-    if (!dcacheInterface && (req->flags & UNCACHEABLE))
+    if (!dcacheInterface && (req->isUncacheable()))
         recordEvent("Uncached Write");
 */
     return NoFault;
diff --git a/src/cpu/ozone/lsq_unit.hh b/src/cpu/ozone/lsq_unit.hh
index 38c1c09a2..056c79521 100644
--- a/src/cpu/ozone/lsq_unit.hh
+++ b/src/cpu/ozone/lsq_unit.hh
@@ -426,7 +426,7 @@ OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
     // at the head of the LSQ and are ready to commit (at the head of the ROB
     // too).
     // @todo: Fix uncached accesses.
-    if (req->flags & UNCACHEABLE &&
+    if (req->isUncacheable() &&
         (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) {
 
         return TheISA::genMachineCheckFault();
diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh
index ee0804036..c46eb90be 100644
--- a/src/cpu/ozone/lsq_unit_impl.hh
+++ b/src/cpu/ozone/lsq_unit_impl.hh
@@ -577,7 +577,7 @@ OzoneLSQ<Impl>::writebackStores()
             MemAccessResult result = dcacheInterface->access(req);
 
             //@todo temp fix for LL/SC (works fine for 1 CPU)
-            if (req->flags & LOCKED) {
+            if (req->isLocked()) {
                 req->result=1;
                 panic("LL/SC! oh no no support!!!");
             }
@@ -596,7 +596,7 @@ OzoneLSQ<Impl>::writebackStores()
                 Event *wb = NULL;
 /*
                 typename IEW::LdWritebackEvent *wb = NULL;
-                if (req->flags & LOCKED) {
+                if (req->isLocked()) {
                     // Stx_C does not generate a system port transaction.
                     req->result=0;
                     wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
@@ -630,7 +630,7 @@ OzoneLSQ<Impl>::writebackStores()
 //                DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
 //                        storeQueue[storeWBIdx].inst->seqNum);
 
-                if (req->flags & LOCKED) {
+                if (req->isLocked()) {
                     // Stx_C does not generate a system port transaction.
                     req->result=1;
                     typename BackEnd::LdWritebackEvent *wb =
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index 6640a9f34..347f4569b 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -507,7 +507,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
     // at the head of the LSQ and are ready to commit (at the head of the ROB
     // too).
     // @todo: Fix uncached accesses.
-    if (req->getFlags() & UNCACHEABLE &&
+    if (req->isUncacheable() &&
         (inst != loadQueue.back() || !inst->isAtCommit())) {
         DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
                 "commit/LSQ!\n",
@@ -659,7 +659,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
         return NoFault;
     }
 
-    if (req->getFlags() & LOCKED) {
+    if (req->isLocked()) {
         cpu->lockFlag = true;
     }
 
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index 4c96ad149..9d17b027f 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -394,7 +394,7 @@ OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
     // Actually probably want the oldest faulting load
     if (load_fault != NoFault) {
         DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
-        if (!(inst->req->getFlags() & UNCACHEABLE && !inst->isAtCommit())) {
+        if (!(inst->req->isUncacheable() && !inst->isAtCommit())) {
             inst->setExecuted();
         }
         // Maybe just set it as can commit here, although that might cause
@@ -605,8 +605,8 @@ OzoneLWLSQ<Impl>::writebackStores()
                 inst->seqNum);
 
         // @todo: Remove this SC hack once the memory system handles it.
-        if (req->getFlags() & LOCKED) {
-            if (req->getFlags() & UNCACHEABLE) {
+        if (req->isLocked()) {
+            if (req->isUncacheable()) {
                 req->setScResult(2);
             } else {
                 if (cpu->lockFlag) {
@@ -663,7 +663,7 @@ OzoneLWLSQ<Impl>::writebackStores()
             if (result != MA_HIT && dcacheInterface->doEvents()) {
                 store_event->miss = true;
                 typename BackEnd::LdWritebackEvent *wb = NULL;
-                if (req->flags & LOCKED) {
+                if (req->isLocked()) {
                     wb = new typename BackEnd::LdWritebackEvent(inst,
                                                             be);
                     store_event->wbEvent = wb;
@@ -690,7 +690,7 @@ OzoneLWLSQ<Impl>::writebackStores()
 //                DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
 //                        inst->seqNum);
 
-                if (req->flags & LOCKED) {
+                if (req->isLocked()) {
                     // Stx_C does not generate a system port
                     // transaction in the 21264, but that might be
                     // hard to accomplish in this model.
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 88698bfee..42b0e9783 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -28,6 +28,7 @@
  * Authors: Steve Reinhardt
  */
 
+#include "arch/locked_mem.hh"
 #include "arch/utility.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/simple/atomic.hh"
@@ -133,20 +134,19 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
 {
     _status = Idle;
 
-    // @todo fix me and get the real cpu id & thread number!!!
     ifetch_req = new Request();
-    ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
+    ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT
     ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast);
     ifetch_pkt->dataStatic(&inst);
 
     data_read_req = new Request();
-    data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
+    data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too
     data_read_pkt = new Packet(data_read_req, Packet::ReadReq,
                                Packet::Broadcast);
     data_read_pkt->dataStatic(&dataReg);
 
     data_write_req = new Request();
-    data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
+    data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too
     data_write_pkt = new Packet(data_write_req, Packet::WriteReq,
                                 Packet::Broadcast);
 }
@@ -253,29 +253,36 @@ template <class T>
 Fault
 AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 {
-    data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
+    // use the CPU's statically allocated read request and packet objects
+    Request *req = data_read_req;
+    Packet  *pkt = data_read_pkt;
+
+    req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
     if (traceData) {
         traceData->setAddr(addr);
     }
 
     // translate to physical address
-    Fault fault = thread->translateDataReadReq(data_read_req);
+    Fault fault = thread->translateDataReadReq(req);
 
     // Now do the access.
     if (fault == NoFault) {
-        data_read_pkt->reinitFromRequest();
+        pkt->reinitFromRequest();
 
-        dcache_latency = dcachePort.sendAtomic(data_read_pkt);
+        dcache_latency = dcachePort.sendAtomic(pkt);
         dcache_access = true;
 
-        assert(data_read_pkt->result == Packet::Success);
-        data = data_read_pkt->get<T>();
+        assert(pkt->result == Packet::Success);
+        data = pkt->get<T>();
 
+        if (req->isLocked()) {
+            TheISA::handleLockedRead(thread, req);
+        }
     }
 
     // This will need a new way to tell if it has a dcache attached.
-    if (data_read_req->getFlags() & UNCACHEABLE)
+    if (req->isUncacheable())
         recordEvent("Uncached Read");
 
     return fault;
@@ -328,33 +335,52 @@ template <class T>
 Fault
 AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 {
-    data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
+    // use the CPU's statically allocated write request and packet objects
+    Request *req = data_write_req;
+    Packet  *pkt = data_write_pkt;
+
+    req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
     if (traceData) {
         traceData->setAddr(addr);
     }
 
     // translate to physical address
-    Fault fault = thread->translateDataWriteReq(data_write_req);
+    Fault fault = thread->translateDataWriteReq(req);
 
     // Now do the access.
     if (fault == NoFault) {
-        data = htog(data);
-        data_write_pkt->reinitFromRequest();
-        data_write_pkt->dataStatic(&data);
+        bool do_access = true;  // flag to suppress cache access
 
-        dcache_latency = dcachePort.sendAtomic(data_write_pkt);
-        dcache_access = true;
+        if (req->isLocked()) {
+            do_access = TheISA::handleLockedWrite(thread, req);
+        }
+
+        if (do_access) {
+            data = htog(data);
+            pkt->reinitFromRequest();
+            pkt->dataStatic(&data);
 
-        assert(data_write_pkt->result == Packet::Success);
+            dcache_latency = dcachePort.sendAtomic(pkt);
+            dcache_access = true;
+
+            assert(pkt->result == Packet::Success);
+        }
 
-        if (res && data_write_req->getFlags() & LOCKED) {
-            *res = data_write_req->getScResult();
+        if (req->isLocked()) {
+            uint64_t scResult = req->getScResult();
+            if (scResult != 0) {
+                // clear failure counter
+                thread->setStCondFailures(0);
+            }
+            if (res) {
+                *res = req->getScResult();
+            }
         }
     }
 
     // This will need a new way to tell if it's hooked up to a cache or not.
-    if (data_write_req->getFlags() & UNCACHEABLE)
+    if (req->isUncacheable())
         recordEvent("Uncached Write");
 
     // If the write needs to have a fault on the access, consider calling
@@ -467,11 +493,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
     Param<Tick> progress_interval;
     SimObjectParam<MemObject *> mem;
     SimObjectParam<System *> system;
+    Param<int> cpu_id;
 
 #if FULL_SYSTEM
     SimObjectParam<AlphaITB *> itb;
     SimObjectParam<AlphaDTB *> dtb;
-    Param<int> cpu_id;
     Param<Tick> profile;
 #else
     SimObjectParam<Process *> workload;
@@ -500,11 +526,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
     INIT_PARAM(progress_interval, "Progress interval"),
     INIT_PARAM(mem, "memory"),
     INIT_PARAM(system, "system object"),
+    INIT_PARAM(cpu_id, "processor ID"),
 
 #if FULL_SYSTEM
     INIT_PARAM(itb, "Instruction TLB"),
     INIT_PARAM(dtb, "Data TLB"),
-    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(profile, ""),
 #else
     INIT_PARAM(workload, "processes to run"),
@@ -538,11 +564,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU)
     params->simulate_stalls = simulate_stalls;
     params->mem = mem;
     params->system = system;
+    params->cpu_id = cpu_id;
 
 #if FULL_SYSTEM
     params->itb = itb;
     params->dtb = dtb;
-    params->cpu_id = cpu_id;
     params->profile = profile;
 #else
     params->process = workload;
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 03ee27e04..a394468b9 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -28,6 +28,7 @@
  * Authors: Steve Reinhardt
  */
 
+#include "arch/locked_mem.hh"
 #include "arch/utility.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/simple/timing.hh"
@@ -94,7 +95,8 @@ TimingSimpleCPU::CpuPort::TickEvent::schedule(Packet *_pkt, Tick t)
 }
 
 TimingSimpleCPU::TimingSimpleCPU(Params *p)
-    : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock)
+    : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock),
+      cpu_id(p->cpu_id)
 {
     _status = Idle;
     ifetch_pkt = dcache_pkt = NULL;
@@ -227,35 +229,35 @@ template <class T>
 Fault
 TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
 {
-    // need to fill in CPU & thread IDs here
-    Request *data_read_req = new Request();
-    data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
-    data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
+    Request *req =
+        new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
+                    cpu_id, /* thread ID */ 0);
 
     if (traceData) {
-        traceData->setAddr(data_read_req->getVaddr());
+        traceData->setAddr(req->getVaddr());
     }
 
    // translate to physical address
-    Fault fault = thread->translateDataReadReq(data_read_req);
+    Fault fault = thread->translateDataReadReq(req);
 
     // Now do the access.
     if (fault == NoFault) {
-        Packet *data_read_pkt =
-            new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast);
-        data_read_pkt->dataDynamic<T>(new T);
+        Packet *pkt =
+            new Packet(req, Packet::ReadReq, Packet::Broadcast);
+        pkt->dataDynamic<T>(new T);
 
-        if (!dcachePort.sendTiming(data_read_pkt)) {
+        if (!dcachePort.sendTiming(pkt)) {
             _status = DcacheRetry;
-            dcache_pkt = data_read_pkt;
+            dcache_pkt = pkt;
         } else {
             _status = DcacheWaitResponse;
+            // memory system takes ownership of packet
             dcache_pkt = NULL;
         }
     }
 
     // This will need a new way to tell if it has a dcache attached.
-    if (data_read_req->getFlags() & UNCACHEABLE)
+    if (req->isUncacheable())
         recordEvent("Uncached Read");
 
     return fault;
@@ -308,31 +310,39 @@ template <class T>
 Fault
 TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 {
-    // need to fill in CPU & thread IDs here
-    Request *data_write_req = new Request();
-    data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
-    data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
+    Request *req =
+        new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
+                    cpu_id, /* thread ID */ 0);
 
     // translate to physical address
-    Fault fault = thread->translateDataWriteReq(data_write_req);
+    Fault fault = thread->translateDataWriteReq(req);
+
     // Now do the access.
     if (fault == NoFault) {
-        Packet *data_write_pkt =
-            new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast);
-        data_write_pkt->allocate();
-        data_write_pkt->set(data);
+        assert(dcache_pkt == NULL);
+        dcache_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+        dcache_pkt->allocate();
+        dcache_pkt->set(data);
 
-        if (!dcachePort.sendTiming(data_write_pkt)) {
-            _status = DcacheRetry;
-            dcache_pkt = data_write_pkt;
-        } else {
-            _status = DcacheWaitResponse;
-            dcache_pkt = NULL;
+        bool do_access = true;  // flag to suppress cache access
+
+        if (req->isLocked()) {
+            do_access = TheISA::handleLockedWrite(thread, req);
+        }
+
+        if (do_access) {
+            if (!dcachePort.sendTiming(dcache_pkt)) {
+                _status = DcacheRetry;
+            } else {
+                _status = DcacheWaitResponse;
+                // memory system takes ownership of packet
+                dcache_pkt = NULL;
+            }
         }
     }
 
     // This will need a new way to tell if it's hooked up to a cache or not.
-    if (data_write_req->getFlags() & UNCACHEABLE)
+    if (req->isUncacheable())
         recordEvent("Uncached Write");
 
     // If the write needs to have a fault on the access, consider calling
@@ -392,9 +402,8 @@ TimingSimpleCPU::fetch()
 {
     checkForInterrupts();
 
-    // need to fill in CPU & thread IDs here
     Request *ifetch_req = new Request();
-    ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
+    ifetch_req->setThreadContext(cpu_id, /* thread ID */ 0);
     Fault fault = setupFetchRequest(ifetch_req);
 
     ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast);
@@ -453,12 +462,20 @@ TimingSimpleCPU::completeIfetch(Packet *pkt)
     if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) {
         // load or store: just send to dcache
         Fault fault = curStaticInst->initiateAcc(this, traceData);
-        if (fault == NoFault) {
-            // successfully initiated access: instruction will
-            // complete in dcache response callback
-            assert(_status == DcacheWaitResponse);
+        if (_status != Running) {
+            // instruction will complete in dcache response callback
+            assert(_status == DcacheWaitResponse || _status == DcacheRetry);
+            assert(fault == NoFault);
         } else {
-            // fault: complete now to invoke fault handler
+            if (fault == NoFault) {
+                // early fail on store conditional: complete now
+                assert(dcache_pkt != NULL);
+                fault = curStaticInst->completeAcc(dcache_pkt, this,
+                                                   traceData);
+                delete dcache_pkt->req;
+                delete dcache_pkt;
+                dcache_pkt = NULL;
+            }
             postExecute();
             advanceInst(fault);
         }
@@ -479,8 +496,7 @@ TimingSimpleCPU::IcachePort::ITickEvent::process()
 bool
 TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt)
 {
-    // These next few lines could be replaced with something faster
-    // who knows what though
+    // delay processing of returned data until next CPU clock edge
     Tick time = pkt->req->getTime();
     while (time < curTick)
         time += lat;
@@ -527,6 +543,10 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt)
 
     Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
 
+    if (pkt->isRead() && pkt->req->isLocked()) {
+        TheISA::handleLockedRead(thread, pkt->req);
+    }
+
     delete pkt->req;
     delete pkt;
 
@@ -546,6 +566,7 @@ TimingSimpleCPU::completeDrain()
 bool
 TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt)
 {
+    // delay processing of returned data until next CPU clock edge
     Tick time = pkt->req->getTime();
     while (time < curTick)
         time += lat;
@@ -574,6 +595,7 @@ TimingSimpleCPU::DcachePort::recvRetry()
     Packet *tmp = cpu->dcache_pkt;
     if (sendTiming(tmp)) {
         cpu->_status = DcacheWaitResponse;
+        // memory system takes ownership of packet
         cpu->dcache_pkt = NULL;
     }
 }
@@ -592,11 +614,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU)
     Param<Tick> progress_interval;
     SimObjectParam<MemObject *> mem;
     SimObjectParam<System *> system;
+    Param<int> cpu_id;
 
 #if FULL_SYSTEM
     SimObjectParam<AlphaITB *> itb;
     SimObjectParam<AlphaDTB *> dtb;
-    Param<int> cpu_id;
     Param<Tick> profile;
 #else
     SimObjectParam<Process *> workload;
@@ -625,11 +647,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU)
     INIT_PARAM(progress_interval, "Progress interval"),
     INIT_PARAM(mem, "memory"),
     INIT_PARAM(system, "system object"),
+    INIT_PARAM(cpu_id, "processor ID"),
 
 #if FULL_SYSTEM
     INIT_PARAM(itb, "Instruction TLB"),
     INIT_PARAM(dtb, "Data TLB"),
-    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(profile, ""),
 #else
     INIT_PARAM(workload, "processes to run"),
@@ -661,11 +683,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU)
     params->functionTraceStart = function_trace_start;
     params->mem = mem;
     params->system = system;
+    params->cpu_id = cpu_id;
 
 #if FULL_SYSTEM
     params->itb = itb;
     params->dtb = dtb;
-    params->cpu_id = cpu_id;
     params->profile = profile;
 #else
     params->process = workload;
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index d03fa4bc0..b65eebd99 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -166,6 +166,8 @@ class TimingSimpleCPU : public BaseSimpleCPU
     Packet *ifetch_pkt;
     Packet *dcache_pkt;
 
+    int cpu_id;
+
   public:
 
     virtual Port *getPort(const std::string &if_name, int idx = -1);
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index 242cfd0e1..6fa6500bd 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -237,7 +237,7 @@ class SimpleThread : public ThreadState
     Fault read(RequestPtr &req, T &data)
     {
 #if FULL_SYSTEM && THE_ISA == ALPHA_ISA
-        if (req->flags & LOCKED) {
+        if (req->isLocked()) {
             req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
             req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
         }
@@ -256,10 +256,10 @@ class SimpleThread : public ThreadState
         ExecContext *xc;
 
         // If this is a store conditional, act appropriately
-        if (req->flags & LOCKED) {
+        if (req->isLocked()) {
             xc = req->xc;
 
-            if (req->flags & UNCACHEABLE) {
+            if (req->isUncacheable()) {
                 // Don't update result register (see stq_c in isa_desc)
                 req->result = 2;
                 xc->setStCondFailures(0);//Needed? [RGD]
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 4cd4dd71a..df85ee0d9 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -118,44 +118,32 @@ Bus::recvTiming(Packet *pkt)
     DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
 
-    Port *pktPort = interfaces[pkt->getSrc()];
-
-    // If the bus is busy, or other devices are in line ahead of the current one,
-    // put this device on the retry list.
-    if (tickNextIdle > curTick || (retryList.size() && pktPort != retryingPort)) {
-        addToRetryList(pktPort);
-        return false;
-    }
-
-    // If the bus is blocked, make the device wait.
-    if (!(port = findDestPort(pkt, pkt->getSrc()))) {
-        addToRetryList(pktPort);
-        return false;
-    }
-
-    // The packet will be sent. Figure out how long it occupies the bus.
-    int numCycles = 0;
-    // Requests need one cycle to send an address
-    if (pkt->isRequest())
-        numCycles++;
-    else if (pkt->isResponse() || pkt->hasData()) {
-        // If a packet has data, it needs ceil(size/width) cycles to send it
-        // We're using the "adding instead of dividing" trick again here
-        if (pkt->hasData()) {
-            int dataSize = pkt->getSize();
-            for (int transmitted = 0; transmitted < dataSize;
-                    transmitted += width) {
-                numCycles++;
+    short dest = pkt->getDest();
+    if (dest == Packet::Broadcast) {
+        if ( timingSnoopPhase1(pkt) )
+        if (timingSnoop(pkt))
+        {
+            timingSnoopPhase2(pkt);
+            pkt->flags |= SNOOP_COMMIT;
+            bool success = timingSnoop(pkt);
+            assert(success);
+            if (pkt->flags & SATISFIED) {
+                //Cache-Cache transfer occuring
+                return true;
             }
-        } else {
-            // If the packet didn't have data, it must have been a response.
-            // Those use the bus for one cycle to send their data.
-            numCycles++;
+            port = findPort(pkt->getAddr(), pkt->getSrc());
         }
+        else
+        {
+            //Snoop didn't succeed
+            retryList.push_back(interfaces[pkt->getSrc()]);
+            return false;
+        }
+    } else {
+        assert(dest >= 0 && dest < interfaces.size());
+        assert(dest != pkt->getSrc()); // catch infinite loops
+        port = interfaces[dest];
     }
-
-    occupyBus(numCycles);
-
     if (port->sendTiming(pkt))  {
         // Packet was successfully sent. Return true.
         // Also take care of retries
@@ -278,43 +266,21 @@ Bus::atomicSnoop(Packet *pkt)
 }
 
 bool
-Bus::timingSnoopPhase1(Packet *pkt)
+Bus::timingSnoop(Packet *pkt)
 {
     std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc());
     bool success = true;
 
     while (!ports.empty() && success)
     {
-        snoopCallbacks.push_back(ports.back());
         success = interfaces[ports.back()]->sendTiming(pkt);
         ports.pop_back();
     }
-    if (!success)
-    {
-        while (!snoopCallbacks.empty())
-        {
-            interfaces[snoopCallbacks.back()]->sendStatusChange(Port::SnoopSquash);
-            snoopCallbacks.pop_back();
-        }
-        return false;
-    }
-    return true;
-}
 
-void
-Bus::timingSnoopPhase2(Packet *pkt)
-{
-    bool success;
-    pkt->flags |= SNOOP_COMMIT;
-    while (!snoopCallbacks.empty())
-    {
-        success = interfaces[snoopCallbacks.back()]->sendTiming(pkt);
-        //We should not fail on snoop callbacks
-        assert(success);
-        snoopCallbacks.pop_back();
-    }
+    return success;
 }
 
+
 /** Function called by the port when the bus is receiving a Atomic
  * transaction.*/
 Tick
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index f238f134d..f8a006911 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -69,9 +69,6 @@ class Bus : public MemObject
     AddrRangeList defaultRange;
     std::vector<DevMap> portSnoopList;
 
-    std::vector<int> snoopCallbacks;
-
-
     /** Function called by the port when the bus is recieving a Timing
       transaction.*/
     bool recvTiming(Packet *pkt);
@@ -121,16 +118,11 @@ class Bus : public MemObject
     /** Snoop all relevant ports atomicly. */
     void atomicSnoop(Packet *pkt);
 
-    /** Snoop for NACK and Blocked in phase 1
+    /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want
+     * the snoop to happen
      * @return True if succeds.
      */
-    bool timingSnoopPhase1(Packet *pkt);
-
-    /** @todo Don't need to commit all snoops just those that need it
-     *(register somehow). */
-    /** Commit all snoops now that we know if any of them would have blocked.
-     */
-    void timingSnoopPhase2(Packet *pkt);
+    bool timingSnoop(Packet *pkt);
 
     /** Process address range request.
      * @param resp addresses that we can respond to
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index c69fb7fd5..2e92e7730 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -165,10 +165,6 @@ class BaseCache : public MemObject
                 memSidePort->sendStatusChange(Port::RangeChange);
             }
         }
-        else if (status == Port::SnoopSquash) {
-            assert(snoopPhase2);
-            snoopPhase2 = false;
-        }
     }
 
     virtual Packet *getPacket()
@@ -215,9 +211,6 @@ class BaseCache : public MemObject
     bool topLevelCache;
 
 
-    /** True if we are now in phase 2 of the snoop process. */
-    bool snoopPhase2;
-
     /** Stores time the cache blocked for statistics. */
     Tick blockedCycle;
 
@@ -523,8 +516,10 @@ class BaseCache : public MemObject
      */
     void respond(Packet *pkt, Tick time)
     {
-        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-        reqCpu->schedule(time);
+        if (pkt->needsResponse()) {
+            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+            reqCpu->schedule(time);
+        }
     }
 
     /**
@@ -537,8 +532,10 @@ class BaseCache : public MemObject
         if (!pkt->req->isUncacheable()) {
             missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += time - pkt->time;
         }
-        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-        reqCpu->schedule(time);
+        if (pkt->needsResponse()) {
+            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+            reqCpu->schedule(time);
+        }
     }
 
     /**
@@ -549,6 +546,7 @@ class BaseCache : public MemObject
     {
 //        assert("Implement\n" && 0);
 //	mi->respond(pkt,curTick + hitLatency);
+        assert (pkt->needsResponse());
         CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
         reqMem->schedule(time);
     }
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 46f4b0ebe..1f03065b6 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -60,7 +60,7 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
 {
     if (isCpuSide)
     {
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             pkt->req->setScResult(1);
         }
         if (!(pkt->flags & SATISFIED)) {
@@ -72,16 +72,9 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
         if (pkt->isResponse())
             handleResponse(pkt);
         else {
-            //Check if we are in phase1
-            if (!snoopPhase2) {
-                snoopPhase2 = true;
-            }
-            else {
-                //Check if we should do the snoop
-                if (pkt->flags && SNOOP_COMMIT)
-                    snoop(pkt);
-                snoopPhase2 = false;
-            }
+            //Check if we should do the snoop
+            if (pkt->flags && SNOOP_COMMIT)
+                snoop(pkt);
         }
     }
     return true;
@@ -95,7 +88,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
     if (isCpuSide)
     {
         //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             pkt->req->setScResult(1);
         }
 
@@ -125,7 +118,7 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide)
         pkt->req->setThreadContext(0,0);
 
         //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             assert("Can't handle LL/SC on functional path\n");
         }
 
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index 8fea733ec..070693442 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -110,28 +110,112 @@ PhysicalMemory::calculateLatency(Packet *pkt)
     return lat;
 }
 
+
+
+// Add load-locked to tracking list.  Should only be called if the
+// operation is a load and the LOCKED flag is set.
+void
+PhysicalMemory::trackLoadLocked(Request *req)
+{
+    Addr paddr = LockedAddr::mask(req->getPaddr());
+
+    // first we check if we already have a locked addr for this
+    // xc.  Since each xc only gets one, we just update the
+    // existing record with the new address.
+    list<LockedAddr>::iterator i;
+
+    for (i = lockedAddrList.begin(); i != lockedAddrList.end(); ++i) {
+        if (i->matchesContext(req)) {
+            DPRINTF(LLSC, "Modifying lock record: cpu %d thread %d addr %#x\n",
+                    req->getCpuNum(), req->getThreadNum(), paddr);
+            i->addr = paddr;
+            return;
+        }
+    }
+
+    // no record for this xc: need to allocate a new one
+    DPRINTF(LLSC, "Adding lock record: cpu %d thread %d addr %#x\n",
+            req->getCpuNum(), req->getThreadNum(), paddr);
+    lockedAddrList.push_front(LockedAddr(req));
+}
+
+
+// Called on *writes* only... both regular stores and
+// store-conditional operations.  Check for conventional stores which
+// conflict with locked addresses, and for success/failure of store
+// conditionals.
+bool
+PhysicalMemory::checkLockedAddrList(Request *req)
+{
+    Addr paddr = LockedAddr::mask(req->getPaddr());
+    bool isLocked = req->isLocked();
+
+    // Initialize return value.  Non-conditional stores always
+    // succeed.  Assume conditional stores will fail until proven
+    // otherwise.
+    bool success = !isLocked;
+
+    // Iterate over list.  Note that there could be multiple matching
+    // records, as more than one context could have done a load locked
+    // to this location.
+    list<LockedAddr>::iterator i = lockedAddrList.begin();
+
+    while (i != lockedAddrList.end()) {
+
+        if (i->addr == paddr) {
+            // we have a matching address
+
+            if (isLocked && i->matchesContext(req)) {
+                // it's a store conditional, and as far as the memory
+                // system can tell, the requesting context's lock is
+                // still valid.
+                DPRINTF(LLSC, "StCond success: cpu %d thread %d addr %#x\n",
+                        req->getCpuNum(), req->getThreadNum(), paddr);
+                success = true;
+            }
+
+            // Get rid of our record of this lock and advance to next
+            DPRINTF(LLSC, "Erasing lock record: cpu %d thread %d addr %#x\n",
+                    i->cpuNum, i->threadNum, paddr);
+            i = lockedAddrList.erase(i);
+        }
+        else {
+            // no match: advance to next record
+            ++i;
+        }
+    }
+
+    if (isLocked) {
+        req->setScResult(success ? 1 : 0);
+    }
+
+    return success;
+}
+
 void
 PhysicalMemory::doFunctionalAccess(Packet *pkt)
 {
     assert(pkt->getAddr() + pkt->getSize() < params()->addrRange.size());
 
-    switch (pkt->cmd) {
-      case Packet::ReadReq:
+    if (pkt->isRead()) {
+        if (pkt->req->isLocked()) {
+            trackLoadLocked(pkt->req);
+        }
         memcpy(pkt->getPtr<uint8_t>(),
                pmemAddr + pkt->getAddr() - params()->addrRange.start,
                pkt->getSize());
-        break;
-      case Packet::WriteReq:
-        memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start,
-               pkt->getPtr<uint8_t>(),
-               pkt->getSize());
-        // temporary hack: will need to add real LL/SC implementation
-        // for cacheless systems later.
-        if (pkt->req->getFlags() & LOCKED) {
-            pkt->req->setScResult(1);
+    }
+    else if (pkt->isWrite()) {
+        if (writeOK(pkt->req)) {
+            memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start,
+                   pkt->getPtr<uint8_t>(), pkt->getSize());
         }
-        break;
-      default:
+    }
+    else if (pkt->isInvalidate()) {
+        //upgrade or invalidate
+        pkt->flags |= SATISFIED;
+    }
+    else {
         panic("unimplemented");
     }
 
diff --git a/src/mem/physical.hh b/src/mem/physical.hh
index 02308b2ef..97bea2ec4 100644
--- a/src/mem/physical.hh
+++ b/src/mem/physical.hh
@@ -78,6 +78,68 @@ class PhysicalMemory : public MemObject
     const PhysicalMemory &operator=(const PhysicalMemory &specmem);
 
   protected:
+
+    class LockedAddr {
+      public:
+        // on alpha, minimum LL/SC granularity is 16 bytes, so lower
+        // bits need to masked off.
+        static const Addr Addr_Mask = 0xf;
+
+        static Addr mask(Addr paddr) { return (paddr & ~Addr_Mask); }
+
+        Addr addr; 	// locked address
+        int cpuNum;	// locking CPU
+        int threadNum;	// locking thread ID within CPU
+
+        // check for matching execution context
+        bool matchesContext(Request *req)
+        {
+            return (cpuNum == req->getCpuNum() &&
+                    threadNum == req->getThreadNum());
+        }
+
+        LockedAddr(Request *req)
+            : addr(mask(req->getPaddr())),
+              cpuNum(req->getCpuNum()),
+              threadNum(req->getThreadNum())
+        {
+        }
+    };
+
+    std::list<LockedAddr> lockedAddrList;
+
+    // helper function for checkLockedAddrs(): we really want to
+    // inline a quick check for an empty locked addr list (hopefully
+    // the common case), and do the full list search (if necessary) in
+    // this out-of-line function
+    bool checkLockedAddrList(Request *req);
+
+    // Record the address of a load-locked operation so that we can
+    // clear the execution context's lock flag if a matching store is
+    // performed
+    void trackLoadLocked(Request *req);
+
+    // Compare a store address with any locked addresses so we can
+    // clear the lock flag appropriately.  Return value set to 'false'
+    // if store operation should be suppressed (because it was a
+    // conditional store and the address was no longer locked by the
+    // requesting execution context), 'true' otherwise.  Note that
+    // this method must be called on *all* stores since even
+    // non-conditional stores must clear any matching lock addresses.
+    bool writeOK(Request *req) {
+        if (lockedAddrList.empty()) {
+            // no locked addrs: nothing to check, store_conditional fails
+            bool isLocked = req->isLocked();
+            if (isLocked) {
+                req->setScResult(0);
+            }
+            return !isLocked; // only do write if not an sc
+        } else {
+            // iterate over list...
+            return checkLockedAddrList(req);
+        }
+    }
+
     uint8_t *pmemAddr;
     MemoryPort *port;
     int pagePtr;
diff --git a/src/mem/port.hh b/src/mem/port.hh
index 6b4184043..bb3bc1b1b 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -106,8 +106,7 @@ class Port
     /** Holds the ports status.  Currently just that a range recomputation needs
      * to be done. */
     enum Status {
-        RangeChange,
-        SnoopSquash
+        RangeChange
     };
 
     void setName(const std::string &name)
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 6acd7526c..e54984fcd 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -232,9 +232,11 @@ class Request
     Addr getPC() { assert(validPC); return pc; }
 
     /** Accessor Function to Check Cacheability. */
-    bool isUncacheable() { return getFlags() & UNCACHEABLE; }
+    bool isUncacheable() { return (getFlags() & UNCACHEABLE) != 0; }
 
-    bool isInstRead() { return getFlags() & INST_READ; }
+    bool isInstRead() { return (getFlags() & INST_READ) != 0; }
+
+    bool isLocked() { return (getFlags() & LOCKED) != 0; }
 
     friend class Packet;
 };
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 55c301c87..cef7a2a5b 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -47,9 +47,11 @@ SimpleTimingPort::recvTiming(Packet *pkt)
     // if we ever added it back.
     assert(pkt->result != Packet::Nacked);
     Tick latency = recvAtomic(pkt);
-    // turn packet around to go back to requester
-    pkt->makeTimingResponse();
-    sendTimingLater(pkt, latency);
+    // turn packet around to go back to requester if response expected
+    if (pkt->needsResponse()) {
+        pkt->makeTimingResponse();
+        sendTimingLater(pkt, latency);
+    }
     return true;
 }
 
diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py
index 0b887cceb..b6dc08e46 100644
--- a/src/python/m5/objects/BaseCPU.py
+++ b/src/python/m5/objects/BaseCPU.py
@@ -11,10 +11,11 @@ class BaseCPU(SimObject):
     mem = Param.MemObject("memory")
 
     system = Param.System(Parent.any, "system object")
+    cpu_id = Param.Int("CPU identifier")
+
     if build_env['FULL_SYSTEM']:
         dtb = Param.AlphaDTB(AlphaDTB(), "Data TLB")
         itb = Param.AlphaITB(AlphaITB(), "Instruction TLB")
-        cpu_id = Param.Int(-1, "CPU identifier")
     else:
         workload = VectorParam.Process("processes to run")