4 files changed, 106 insertions, 40 deletions
diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py
index 3853ebbb4..ef110d682 100644
--- a/configs/ruby/MOESI_CMP_token.py
+++ b/configs/ruby/MOESI_CMP_token.py
@@ -51,7 +51,9 @@ def define_options(parser):
                       help="Token_CMP: cycles until issuing again");
     parser.add_option("--disable-dyn-timeouts", action="store_true",
           help="Token_CMP: disable dyanimc timeouts, use fixed latency instead")
-
+    parser.add_option("--allow-atomic-migration", action="store_true",
+          help="allow migratory sharing for atomic only accessed blocks")
+    
 def create_system(options, system, piobus, dma_devices):
     
     if buildEnv['PROTOCOL'] != 'MOESI_CMP_token':
@@ -111,7 +113,9 @@ def create_system(options, system, piobus, dma_devices):
                                       fixed_timeout_latency = \
                                         options.timeout_latency,
                                       dynamic_timeout_enabled = \
-                                        not options.disable_dyn_timeouts)
+                                        not options.disable_dyn_timeouts,
+                                      no_mig_atomic = not \
+                                        options.allow_atomic_migration)
 
         exec("system.l1_cntrl%d = l1_cntrl" % i)
         #
diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py
index 818600a5a..02d958b09 100644
--- a/configs/ruby/MOESI_hammer.py
+++ b/configs/ruby/MOESI_hammer.py
@@ -44,7 +44,8 @@ class L2Cache(RubyCache):
     latency = 10
 
 def define_options(parser):
-    return
+    parser.add_option("--allow-atomic-migration", action="store_true",
+          help="allow migratory sharing for atomic only accessed blocks")
 
 def create_system(options, system, piobus, dma_devices):
     
@@ -91,7 +92,9 @@ def create_system(options, system, piobus, dma_devices):
                                       sequencer = cpu_seq,
                                       L1IcacheMemory = l1i_cache,
                                       L1DcacheMemory = l1d_cache,
-                                      L2cacheMemory = l2_cache)
+                                      L2cacheMemory = l2_cache,
+                                      no_mig_atomic = not \
+                                        options.allow_atomic_migration)
 
         exec("system.l1_cntrl%d = l1_cntrl" % i)
         #
diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
index d3e993efa..7a234e56f 100644
--- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
@@ -42,7 +42,8 @@ machine(L1Cache, "Token protocol")
    int l1_response_latency = 2,
    int retry_threshold = 1,
    int fixed_timeout_latency = 100,
-   bool dynamic_timeout_enabled = true
+   bool dynamic_timeout_enabled = true,
+   bool no_mig_atomic = true
 {
 
   // From this node's L1 cache TO the network
@@ -92,6 +93,7 @@ machine(L1Cache, "Token protocol")
     Load,            desc="Load request from the processor";
     Ifetch,          desc="I-fetch request from the processor";
     Store,           desc="Store request from the processor";
+    Atomic,          desc="Atomic request from the processor";
     L1_Replacement,  desc="L1 Replacement";
 
     // Responses
@@ -120,7 +122,7 @@ machine(L1Cache, "Token protocol")
     Use_TimeoutStarverX,             desc="Timeout";
     Use_TimeoutStarverS,             desc="Timeout";
     Use_TimeoutNoStarvers,             desc="Timeout";
-
+    Use_TimeoutNoStarvers_NoMig,     desc="Timeout Don't Migrate";
   }
 
   // TYPES
@@ -143,6 +145,7 @@ machine(L1Cache, "Token protocol")
 
     bool WentPersistent, default="false",  desc="Request went persistent";
     bool ExternalResponse, default="false", desc="Response came from an external controller";
+    bool IsAtomic, default="false",       desc="Request was an atomic request";
 
     AccessType AccessType,                desc="Type of request (used for profiling)";
     Time IssueTime,                       desc="Time the request was issued";
@@ -361,8 +364,14 @@ machine(L1Cache, "Token protocol")
       return Event:Load;
     } else if (type == CacheRequestType:IFETCH) {
       return Event:Ifetch;
-    } else if ((type == CacheRequestType:ST) || (type == CacheRequestType:ATOMIC)) {
+    } else if (type == CacheRequestType:ST) {
       return Event:Store;
+    } else if (type == CacheRequestType:ATOMIC) {
+      if (no_mig_atomic) {
+        return Event:Atomic;
+      } else {
+        return Event:Store;
+      }
     } else {
       error("Invalid CacheRequestType");
     }
@@ -422,13 +431,16 @@ machine(L1Cache, "Token protocol")
       if (persistentTable.isLocked(useTimerTable.readyAddress()) && (persistentTable.findSmallest(useTimerTable.readyAddress()) != machineID)) {
         if (persistentTable.typeOfSmallest(useTimerTable.readyAddress()) == AccessType:Write) {
           trigger(Event:Use_TimeoutStarverX, useTimerTable.readyAddress());
-        }
-        else {
+        } else {
           trigger(Event:Use_TimeoutStarverS, useTimerTable.readyAddress());
         }
-      }
-      else {
-        trigger(Event:Use_TimeoutNoStarvers, useTimerTable.readyAddress());
+      } else {
+        assert(L1_TBEs.isPresent(useTimerTable.readyAddress()));
+        if (no_mig_atomic && L1_TBEs[useTimerTable.readyAddress()].IsAtomic) {
+          trigger(Event:Use_TimeoutNoStarvers_NoMig, useTimerTable.readyAddress());
+        } else {
+          trigger(Event:Use_TimeoutNoStarvers, useTimerTable.readyAddress());
+        }
       }
     }
   }
@@ -1245,6 +1257,9 @@ machine(L1Cache, "Token protocol")
     peek(mandatoryQueue_in, CacheMsg) {
       L1_TBEs[address].PC := in_msg.ProgramCounter;
       L1_TBEs[address].AccessType := cache_request_type_to_access_type(in_msg.Type);
+      if (in_msg.Type == CacheRequestType:ATOMIC) {
+        L1_TBEs[address].IsAtomic := true;
+      }
       L1_TBEs[address].Prefetch := in_msg.Prefetch;
       L1_TBEs[address].AccessMode := in_msg.AccessMode;
     }
@@ -1444,7 +1459,7 @@ machine(L1Cache, "Token protocol")
     zz_recycleMandatoryQueue;
   }
 
-  transition({IM, SM, OM, IS, IM_L, IS_L, SM_L}, Store) {
+  transition({IM, SM, OM, IS, IM_L, IS_L, SM_L}, {Store, Atomic}) {
     zz_recycleMandatoryQueue;
   }
 
@@ -1475,7 +1490,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(NP, Store, IM) {
+  transition(NP, {Store, Atomic}, IM) {
     ii_allocateL1DCacheBlock;
     i_allocateTBE;
     b_issueWriteRequest;
@@ -1511,7 +1526,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(I, Store, IM) {
+  transition(I, {Store, Atomic}, IM) {
     i_allocateTBE;
     b_issueWriteRequest;
     uu_profileMiss;
@@ -1570,7 +1585,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(S, Store, SM) {
+  transition(S, {Store, Atomic}, SM) {
     i_allocateTBE;
     b_issueWriteRequest;
     uu_profileMiss;
@@ -1646,7 +1661,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(O, Store, OM) {
+  transition(O, {Store, Atomic}, OM) {
     i_allocateTBE;
     b_issueWriteRequest;
     uu_profileMiss;
@@ -1723,7 +1738,17 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition({MM, MM_W}, Store) {
+  transition({MM_W}, {Store, Atomic}) {
+    hh_store_hit;
+    k_popMandatoryQueue;
+  }
+
+  transition(MM, Store) {
+    hh_store_hit;
+    k_popMandatoryQueue;
+  }
+
+  transition(MM, Atomic, M) {
     hh_store_hit;
     k_popMandatoryQueue;
   }
@@ -1755,12 +1780,16 @@ machine(L1Cache, "Token protocol")
     l_popPersistentQueue;
   }
 
-
   transition(MM_W, Use_TimeoutNoStarvers, MM) {
     s_deallocateTBE;
     jj_unsetUseTimer;
   }
 
+  transition(MM_W, Use_TimeoutNoStarvers_NoMig, M) {
+    s_deallocateTBE;
+    jj_unsetUseTimer;
+  }
+
   // Transitions from Dirty Exclusive
   transition({M, M_W}, {Load, Ifetch}) {
     h_load_hit;
@@ -1772,11 +1801,21 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
+  transition(M, Atomic) {
+    hh_store_hit;
+    k_popMandatoryQueue;
+  }
+
   transition(M_W, Store, MM_W) {
     hh_store_hit;
     k_popMandatoryQueue;
   }
 
+  transition(M_W, Atomic) {
+    hh_store_hit;
+    k_popMandatoryQueue;
+  }
+
   transition(M, L1_Replacement, I) {
     c_ownedReplacement;
     gg_deallocateL1CacheBlock;
@@ -1825,7 +1864,7 @@ machine(L1Cache, "Token protocol")
   }
 
   // someone unlocked during timeout
-  transition(M_W, Use_TimeoutNoStarvers, M) {
+  transition(M_W, {Use_TimeoutNoStarvers, Use_TimeoutNoStarvers_NoMig}, M) {
     s_deallocateTBE;
     jj_unsetUseTimer;
   }
@@ -2065,7 +2104,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(I_L, Store, IM_L) {
+  transition(I_L, {Store, Atomic}, IM_L) {
     ii_allocateL1DCacheBlock;
     i_allocateTBE;
     b_issueWriteRequest;
@@ -2076,7 +2115,7 @@ machine(L1Cache, "Token protocol")
 
   // Transitions from S_L
 
-  transition(S_L, Store, SM_L) {
+  transition(S_L, {Store, Atomic}, SM_L) {
     i_allocateTBE;
     b_issueWriteRequest;
     uu_profileMiss;
diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index 44ae479c7..7b49c075c 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -40,7 +40,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
   CacheMemory * L2cacheMemory,
   int cache_response_latency = 10,
   int issue_latency = 2,
-  int l2_cache_hit_latency = 10
+  int l2_cache_hit_latency = 10,
+  bool no_mig_atomic = true
 {
 
   // NETWORK BUFFERS
@@ -94,6 +95,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     // Requests
     Other_GETX,      desc="A GetX from another processor";
     Other_GETS,      desc="A GetS from another processor";
+    Other_GETS_No_Mig, desc="A GetS from another processor";
 
     // Responses
     Ack,             desc="Received an ack message";
@@ -122,6 +124,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     bool Dirty,              desc="Is the data dirty (different than memory)?";
     DataBlock DataBlk,       desc="data for the block";
     bool FromL2, default="false", desc="block just moved from L2";
+    bool AtomicAccessed, default="false", desc="block just moved from L2";
   }
 
   // TBE fields
@@ -280,7 +283,15 @@ machine(L1Cache, "AMD Hammer-like protocol")
         if (in_msg.Type == CoherenceRequestType:GETX) {
           trigger(Event:Other_GETX, in_msg.Address);
         } else if (in_msg.Type == CoherenceRequestType:GETS) {
-          trigger(Event:Other_GETS, in_msg.Address);
+          if (isCacheTagPresent(in_msg.Address)) {
+            if (getCacheEntry(in_msg.Address).AtomicAccessed && no_mig_atomic) {
+              trigger(Event:Other_GETS_No_Mig, in_msg.Address);
+            } else {
+              trigger(Event:Other_GETS, in_msg.Address);
+            }
+          } else {
+            trigger(Event:Other_GETS, in_msg.Address);
+          }
         } else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
           trigger(Event:Writeback_Ack, in_msg.Address);
         } else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
@@ -538,12 +549,16 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") {
     DEBUG_EXPR(getCacheEntry(address).DataBlk);
+    peek(mandatoryQueue_in, CacheMsg) {
+      sequencer.writeCallback(address, 
+                              testAndClearLocalHit(address), 
+                              getCacheEntry(address).DataBlk);
 
-    sequencer.writeCallback(address, 
-                            testAndClearLocalHit(address), 
-                            getCacheEntry(address).DataBlk);
-
-    getCacheEntry(address).Dirty := true;
+      getCacheEntry(address).Dirty := true;
+      if (in_msg.Type == CacheRequestType:ATOMIC) {
+        getCacheEntry(address).AtomicAccessed := true;
+      }
+    }
   }
 
   action(sx_external_store_hit, "sx", desc="store required external msgs.") {
@@ -798,7 +813,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     zz_recycleMandatoryQueue;
   }
 
-  transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS}) {
+  transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
     // stall
   }
 
@@ -948,7 +963,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     rr_deallocateL2CacheBlock;
   }
 
-  transition(I, {Other_GETX, Other_GETS}) {
+  transition(I, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
     f_sendAck;
     l_popForwardQueue;
   }
@@ -975,7 +990,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(S, Other_GETS) {
+  transition(S, {Other_GETS, Other_GETS_No_Mig}) {
     ff_sendAckShared;
     l_popForwardQueue;
   }
@@ -1005,7 +1020,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(O, Other_GETS) {
+  transition(O, {Other_GETS, Other_GETS_No_Mig}) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
@@ -1037,6 +1052,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
   
+  transition(MM, Other_GETS_No_Mig, O) {
+    ee_sendDataShared;
+    l_popForwardQueue;
+  }
+  
   // Transitions from Dirty Exclusive
   transition(M, {Load, Ifetch}) {
     h_load_hit;
@@ -1059,14 +1079,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(M, Other_GETS, O) {
+  transition(M, {Other_GETS, Other_GETS_No_Mig}, O) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
 
   // Transitions from IM
 
-  transition(IM, {Other_GETX, Other_GETS}) {
+  transition(IM, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
     f_sendAck;
     l_popForwardQueue;
   }
@@ -1093,7 +1113,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from SM
-  transition(SM, Other_GETS) {
+  transition(SM, {Other_GETS, Other_GETS_No_Mig}) {
     ff_sendAckShared;
     l_popForwardQueue;
   }
@@ -1138,7 +1158,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(OM, Other_GETS) {
+  transition(OM, {Other_GETS, Other_GETS_No_Mig}) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
@@ -1158,7 +1178,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Transitions from IS
 
-  transition(IS, {Other_GETX, Other_GETS}) {
+  transition(IS, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
     f_sendAck;
     l_popForwardQueue;
   }
@@ -1274,7 +1294,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition({OI, MI}, Other_GETS, OI) {
+  transition({OI, MI}, {Other_GETS, Other_GETS_No_Mig}, OI) {
     q_sendDataFromTBEToCache;
     l_popForwardQueue;
   }
@@ -1292,7 +1312,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from II
-  transition(II, {Other_GETS, Other_GETX}, II) {
+  transition(II, {Other_GETS, Other_GETS_No_Mig, Other_GETX}, II) {
     f_sendAck;
     l_popForwardQueue;
   }