9 files changed, 213 insertions, 115 deletions
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 4cd4dd71a..df85ee0d9 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -118,44 +118,32 @@ Bus::recvTiming(Packet *pkt)
     DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
 
-    Port *pktPort = interfaces[pkt->getSrc()];
-
-    // If the bus is busy, or other devices are in line ahead of the current one,
-    // put this device on the retry list.
-    if (tickNextIdle > curTick || (retryList.size() && pktPort != retryingPort)) {
-        addToRetryList(pktPort);
-        return false;
-    }
-
-    // If the bus is blocked, make the device wait.
-    if (!(port = findDestPort(pkt, pkt->getSrc()))) {
-        addToRetryList(pktPort);
-        return false;
-    }
-
-    // The packet will be sent. Figure out how long it occupies the bus.
-    int numCycles = 0;
-    // Requests need one cycle to send an address
-    if (pkt->isRequest())
-        numCycles++;
-    else if (pkt->isResponse() || pkt->hasData()) {
-        // If a packet has data, it needs ceil(size/width) cycles to send it
-        // We're using the "adding instead of dividing" trick again here
-        if (pkt->hasData()) {
-            int dataSize = pkt->getSize();
-            for (int transmitted = 0; transmitted < dataSize;
-                    transmitted += width) {
-                numCycles++;
+    short dest = pkt->getDest();
+    if (dest == Packet::Broadcast) {
+        if ( timingSnoopPhase1(pkt) )
+        if (timingSnoop(pkt))
+        {
+            timingSnoopPhase2(pkt);
+            pkt->flags |= SNOOP_COMMIT;
+            bool success = timingSnoop(pkt);
+            assert(success);
+            if (pkt->flags & SATISFIED) {
+                //Cache-Cache transfer occuring
+                return true;
             }
-        } else {
-            // If the packet didn't have data, it must have been a response.
-            // Those use the bus for one cycle to send their data.
-            numCycles++;
+            port = findPort(pkt->getAddr(), pkt->getSrc());
         }
+        else
+        {
+            //Snoop didn't succeed
+            retryList.push_back(interfaces[pkt->getSrc()]);
+            return false;
+        }
+    } else {
+        assert(dest >= 0 && dest < interfaces.size());
+        assert(dest != pkt->getSrc()); // catch infinite loops
+        port = interfaces[dest];
     }
-
-    occupyBus(numCycles);
-
     if (port->sendTiming(pkt))  {
         // Packet was successfully sent. Return true.
         // Also take care of retries
@@ -278,43 +266,21 @@ Bus::atomicSnoop(Packet *pkt)
 }
 
 bool
-Bus::timingSnoopPhase1(Packet *pkt)
+Bus::timingSnoop(Packet *pkt)
 {
     std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc());
     bool success = true;
 
     while (!ports.empty() && success)
     {
-        snoopCallbacks.push_back(ports.back());
         success = interfaces[ports.back()]->sendTiming(pkt);
         ports.pop_back();
     }
-    if (!success)
-    {
-        while (!snoopCallbacks.empty())
-        {
-            interfaces[snoopCallbacks.back()]->sendStatusChange(Port::SnoopSquash);
-            snoopCallbacks.pop_back();
-        }
-        return false;
-    }
-    return true;
-}
 
-void
-Bus::timingSnoopPhase2(Packet *pkt)
-{
-    bool success;
-    pkt->flags |= SNOOP_COMMIT;
-    while (!snoopCallbacks.empty())
-    {
-        success = interfaces[snoopCallbacks.back()]->sendTiming(pkt);
-        //We should not fail on snoop callbacks
-        assert(success);
-        snoopCallbacks.pop_back();
-    }
+    return success;
 }
 
+
 /** Function called by the port when the bus is receiving a Atomic
  * transaction.*/
 Tick
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index f238f134d..f8a006911 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -69,9 +69,6 @@ class Bus : public MemObject
     AddrRangeList defaultRange;
     std::vector<DevMap> portSnoopList;
 
-    std::vector<int> snoopCallbacks;
-
-
     /** Function called by the port when the bus is recieving a Timing
       transaction.*/
     bool recvTiming(Packet *pkt);
@@ -121,16 +118,11 @@ class Bus : public MemObject
     /** Snoop all relevant ports atomicly. */
     void atomicSnoop(Packet *pkt);
 
-    /** Snoop for NACK and Blocked in phase 1
+    /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want
+     * the snoop to happen
      * @return True if succeds.
      */
-    bool timingSnoopPhase1(Packet *pkt);
-
-    /** @todo Don't need to commit all snoops just those that need it
-     *(register somehow). */
-    /** Commit all snoops now that we know if any of them would have blocked.
-     */
-    void timingSnoopPhase2(Packet *pkt);
+    bool timingSnoop(Packet *pkt);
 
     /** Process address range request.
      * @param resp addresses that we can respond to
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index c69fb7fd5..2e92e7730 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -165,10 +165,6 @@ class BaseCache : public MemObject
                 memSidePort->sendStatusChange(Port::RangeChange);
             }
         }
-        else if (status == Port::SnoopSquash) {
-            assert(snoopPhase2);
-            snoopPhase2 = false;
-        }
     }
 
     virtual Packet *getPacket()
@@ -215,9 +211,6 @@ class BaseCache : public MemObject
     bool topLevelCache;
 
 
-    /** True if we are now in phase 2 of the snoop process. */
-    bool snoopPhase2;
-
     /** Stores time the cache blocked for statistics. */
     Tick blockedCycle;
 
@@ -523,8 +516,10 @@ class BaseCache : public MemObject
      */
     void respond(Packet *pkt, Tick time)
     {
-        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-        reqCpu->schedule(time);
+        if (pkt->needsResponse()) {
+            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+            reqCpu->schedule(time);
+        }
     }
 
     /**
@@ -537,8 +532,10 @@ class BaseCache : public MemObject
         if (!pkt->req->isUncacheable()) {
             missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += time - pkt->time;
         }
-        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-        reqCpu->schedule(time);
+        if (pkt->needsResponse()) {
+            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+            reqCpu->schedule(time);
+        }
     }
 
     /**
@@ -549,6 +546,7 @@ class BaseCache : public MemObject
     {
 //        assert("Implement\n" && 0);
 //	mi->respond(pkt,curTick + hitLatency);
+        assert (pkt->needsResponse());
         CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
         reqMem->schedule(time);
     }
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 46f4b0ebe..1f03065b6 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -60,7 +60,7 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
 {
     if (isCpuSide)
     {
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             pkt->req->setScResult(1);
         }
         if (!(pkt->flags & SATISFIED)) {
@@ -72,16 +72,9 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
         if (pkt->isResponse())
             handleResponse(pkt);
         else {
-            //Check if we are in phase1
-            if (!snoopPhase2) {
-                snoopPhase2 = true;
-            }
-            else {
-                //Check if we should do the snoop
-                if (pkt->flags && SNOOP_COMMIT)
-                    snoop(pkt);
-                snoopPhase2 = false;
-            }
+            //Check if we should do the snoop
+            if (pkt->flags && SNOOP_COMMIT)
+                snoop(pkt);
         }
     }
     return true;
@@ -95,7 +88,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
     if (isCpuSide)
     {
         //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             pkt->req->setScResult(1);
         }
 
@@ -125,7 +118,7 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide)
         pkt->req->setThreadContext(0,0);
 
         //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
             assert("Can't handle LL/SC on functional path\n");
         }
 
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index 8fea733ec..070693442 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -110,28 +110,112 @@ PhysicalMemory::calculateLatency(Packet *pkt)
     return lat;
 }
 
+
+
+// Add load-locked to tracking list.  Should only be called if the
+// operation is a load and the LOCKED flag is set.
+void
+PhysicalMemory::trackLoadLocked(Request *req)
+{
+    Addr paddr = LockedAddr::mask(req->getPaddr());
+
+    // first we check if we already have a locked addr for this
+    // xc.  Since each xc only gets one, we just update the
+    // existing record with the new address.
+    list<LockedAddr>::iterator i;
+
+    for (i = lockedAddrList.begin(); i != lockedAddrList.end(); ++i) {
+        if (i->matchesContext(req)) {
+            DPRINTF(LLSC, "Modifying lock record: cpu %d thread %d addr %#x\n",
+                    req->getCpuNum(), req->getThreadNum(), paddr);
+            i->addr = paddr;
+            return;
+        }
+    }
+
+    // no record for this xc: need to allocate a new one
+    DPRINTF(LLSC, "Adding lock record: cpu %d thread %d addr %#x\n",
+            req->getCpuNum(), req->getThreadNum(), paddr);
+    lockedAddrList.push_front(LockedAddr(req));
+}
+
+
+// Called on *writes* only... both regular stores and
+// store-conditional operations.  Check for conventional stores which
+// conflict with locked addresses, and for success/failure of store
+// conditionals.
+bool
+PhysicalMemory::checkLockedAddrList(Request *req)
+{
+    Addr paddr = LockedAddr::mask(req->getPaddr());
+    bool isLocked = req->isLocked();
+
+    // Initialize return value.  Non-conditional stores always
+    // succeed.  Assume conditional stores will fail until proven
+    // otherwise.
+    bool success = !isLocked;
+
+    // Iterate over list.  Note that there could be multiple matching
+    // records, as more than one context could have done a load locked
+    // to this location.
+    list<LockedAddr>::iterator i = lockedAddrList.begin();
+
+    while (i != lockedAddrList.end()) {
+
+        if (i->addr == paddr) {
+            // we have a matching address
+
+            if (isLocked && i->matchesContext(req)) {
+                // it's a store conditional, and as far as the memory
+                // system can tell, the requesting context's lock is
+                // still valid.
+                DPRINTF(LLSC, "StCond success: cpu %d thread %d addr %#x\n",
+                        req->getCpuNum(), req->getThreadNum(), paddr);
+                success = true;
+            }
+
+            // Get rid of our record of this lock and advance to next
+            DPRINTF(LLSC, "Erasing lock record: cpu %d thread %d addr %#x\n",
+                    i->cpuNum, i->threadNum, paddr);
+            i = lockedAddrList.erase(i);
+        }
+        else {
+            // no match: advance to next record
+            ++i;
+        }
+    }
+
+    if (isLocked) {
+        req->setScResult(success ? 1 : 0);
+    }
+
+    return success;
+}
+
 void
 PhysicalMemory::doFunctionalAccess(Packet *pkt)
 {
     assert(pkt->getAddr() + pkt->getSize() < params()->addrRange.size());
 
-    switch (pkt->cmd) {
-      case Packet::ReadReq:
+    if (pkt->isRead()) {
+        if (pkt->req->isLocked()) {
+            trackLoadLocked(pkt->req);
+        }
         memcpy(pkt->getPtr<uint8_t>(),
                pmemAddr + pkt->getAddr() - params()->addrRange.start,
                pkt->getSize());
-        break;
-      case Packet::WriteReq:
-        memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start,
-               pkt->getPtr<uint8_t>(),
-               pkt->getSize());
-        // temporary hack: will need to add real LL/SC implementation
-        // for cacheless systems later.
-        if (pkt->req->getFlags() & LOCKED) {
-            pkt->req->setScResult(1);
+    }
+    else if (pkt->isWrite()) {
+        if (writeOK(pkt->req)) {
+            memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start,
+                   pkt->getPtr<uint8_t>(), pkt->getSize());
         }
-        break;
-      default:
+    }
+    else if (pkt->isInvalidate()) {
+        //upgrade or invalidate
+        pkt->flags |= SATISFIED;
+    }
+    else {
         panic("unimplemented");
     }
 
diff --git a/src/mem/physical.hh b/src/mem/physical.hh
index 02308b2ef..97bea2ec4 100644
--- a/src/mem/physical.hh
+++ b/src/mem/physical.hh
@@ -78,6 +78,68 @@ class PhysicalMemory : public MemObject
     const PhysicalMemory &operator=(const PhysicalMemory &specmem);
 
   protected:
+
+    class LockedAddr {
+      public:
+        // on alpha, minimum LL/SC granularity is 16 bytes, so lower
+        // bits need to masked off.
+        static const Addr Addr_Mask = 0xf;
+
+        static Addr mask(Addr paddr) { return (paddr & ~Addr_Mask); }
+
+        Addr addr; 	// locked address
+        int cpuNum;	// locking CPU
+        int threadNum;	// locking thread ID within CPU
+
+        // check for matching execution context
+        bool matchesContext(Request *req)
+        {
+            return (cpuNum == req->getCpuNum() &&
+                    threadNum == req->getThreadNum());
+        }
+
+        LockedAddr(Request *req)
+            : addr(mask(req->getPaddr())),
+              cpuNum(req->getCpuNum()),
+              threadNum(req->getThreadNum())
+        {
+        }
+    };
+
+    std::list<LockedAddr> lockedAddrList;
+
+    // helper function for checkLockedAddrs(): we really want to
+    // inline a quick check for an empty locked addr list (hopefully
+    // the common case), and do the full list search (if necessary) in
+    // this out-of-line function
+    bool checkLockedAddrList(Request *req);
+
+    // Record the address of a load-locked operation so that we can
+    // clear the execution context's lock flag if a matching store is
+    // performed
+    void trackLoadLocked(Request *req);
+
+    // Compare a store address with any locked addresses so we can
+    // clear the lock flag appropriately.  Return value set to 'false'
+    // if store operation should be suppressed (because it was a
+    // conditional store and the address was no longer locked by the
+    // requesting execution context), 'true' otherwise.  Note that
+    // this method must be called on *all* stores since even
+    // non-conditional stores must clear any matching lock addresses.
+    bool writeOK(Request *req) {
+        if (lockedAddrList.empty()) {
+            // no locked addrs: nothing to check, store_conditional fails
+            bool isLocked = req->isLocked();
+            if (isLocked) {
+                req->setScResult(0);
+            }
+            return !isLocked; // only do write if not an sc
+        } else {
+            // iterate over list...
+            return checkLockedAddrList(req);
+        }
+    }
+
     uint8_t *pmemAddr;
     MemoryPort *port;
     int pagePtr;
diff --git a/src/mem/port.hh b/src/mem/port.hh
index 6b4184043..bb3bc1b1b 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -106,8 +106,7 @@ class Port
     /** Holds the ports status.  Currently just that a range recomputation needs
      * to be done. */
     enum Status {
-        RangeChange,
-        SnoopSquash
+        RangeChange
     };
 
     void setName(const std::string &name)
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 6acd7526c..e54984fcd 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -232,9 +232,11 @@ class Request
     Addr getPC() { assert(validPC); return pc; }
 
     /** Accessor Function to Check Cacheability. */
-    bool isUncacheable() { return getFlags() & UNCACHEABLE; }
+    bool isUncacheable() { return (getFlags() & UNCACHEABLE) != 0; }
 
-    bool isInstRead() { return getFlags() & INST_READ; }
+    bool isInstRead() { return (getFlags() & INST_READ) != 0; }
+
+    bool isLocked() { return (getFlags() & LOCKED) != 0; }
 
     friend class Packet;
 };
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 55c301c87..cef7a2a5b 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -47,9 +47,11 @@ SimpleTimingPort::recvTiming(Packet *pkt)
     // if we ever added it back.
     assert(pkt->result != Packet::Nacked);
     Tick latency = recvAtomic(pkt);
-    // turn packet around to go back to requester
-    pkt->makeTimingResponse();
-    sendTimingLater(pkt, latency);
+    // turn packet around to go back to requester if response expected
+    if (pkt->needsResponse()) {
+        pkt->makeTimingResponse();
+        sendTimingLater(pkt, latency);
+    }
     return true;
 }