ruby: Disable migratory sharing for token and hammer

This patch allows one to disable migratory sharing for those cache blocks that are accessed by atomic requests. While the implementations are different between the token and hammer protocols, the motivation is the same. For Alpha, LLSC semantics expect that normal loads do not unlock cache blocks that have been locked by LL accesses. Therefore, locked blocks should not transfer write permissions when responding to these load requests. Instead, only they only transfer read permissions so that the subsequent SC access can possibly succeed.
author: Brad Beckmann <Brad.Beckmann@amd.com> 2010-08-20 11:46:13 -0700
committer: Brad Beckmann <Brad.Beckmann@amd.com> 2010-08-20 11:46:13 -0700
commit: 72044e3f5a5b9455e07180806793127be2014451 (patch)
tree: fe2e7f7f129a31dccf84ee2d0bfc93d9c76b7f07
parent: bcdd19df03ed047e4ce03dbf81bc40173cc3217b (diff)
download: gem5-72044e3f5a5b9455e07180806793127be2014451.tar.xz
4 files changed, 106 insertions, 40 deletions
diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py
index 3853ebbb4..ef110d682 100644
--- a/configs/ruby/MOESI_CMP_token.py
+++ b/configs/ruby/MOESI_CMP_token.py
@@ -51,7 +51,9 @@ def define_options(parser):
                       help="Token_CMP: cycles until issuing again");
     parser.add_option("--disable-dyn-timeouts", action="store_true",
           help="Token_CMP: disable dyanimc timeouts, use fixed latency instead")
-
+    parser.add_option("--allow-atomic-migration", action="store_true",
+          help="allow migratory sharing for atomic only accessed blocks")
+    
 def create_system(options, system, piobus, dma_devices):
     
     if buildEnv['PROTOCOL'] != 'MOESI_CMP_token':
@@ -111,7 +113,9 @@ def create_system(options, system, piobus, dma_devices):
                                       fixed_timeout_latency = \
                                         options.timeout_latency,
                                       dynamic_timeout_enabled = \
-                                        not options.disable_dyn_timeouts)
+                                        not options.disable_dyn_timeouts,
+                                      no_mig_atomic = not \
+                                        options.allow_atomic_migration)
 
         exec("system.l1_cntrl%d = l1_cntrl" % i)
         #
diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py
index 818600a5a..02d958b09 100644
--- a/configs/ruby/MOESI_hammer.py
+++ b/configs/ruby/MOESI_hammer.py
@@ -44,7 +44,8 @@ class L2Cache(RubyCache):
     latency = 10
 
 def define_options(parser):
-    return
+    parser.add_option("--allow-atomic-migration", action="store_true",
+          help="allow migratory sharing for atomic only accessed blocks")
 
 def create_system(options, system, piobus, dma_devices):
     
@@ -91,7 +92,9 @@ def create_system(options, system, piobus, dma_devices):
                                       sequencer = cpu_seq,
                                       L1IcacheMemory = l1i_cache,
                                       L1DcacheMemory = l1d_cache,
-                                      L2cacheMemory = l2_cache)
+                                      L2cacheMemory = l2_cache,
+                                      no_mig_atomic = not \
+                                        options.allow_atomic_migration)
 
         exec("system.l1_cntrl%d = l1_cntrl" % i)
         #
diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
index d3e993efa..7a234e56f 100644
--- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
@@ -42,7 +42,8 @@ machine(L1Cache, "Token protocol")
    int l1_response_latency = 2,
    int retry_threshold = 1,
    int fixed_timeout_latency = 100,
-   bool dynamic_timeout_enabled = true
+   bool dynamic_timeout_enabled = true,
+   bool no_mig_atomic = true
 {
 
   // From this node's L1 cache TO the network
@@ -92,6 +93,7 @@ machine(L1Cache, "Token protocol")
     Load,            desc="Load request from the processor";
     Ifetch,          desc="I-fetch request from the processor";
     Store,           desc="Store request from the processor";
+    Atomic,          desc="Atomic request from the processor";
     L1_Replacement,  desc="L1 Replacement";
 
     // Responses
@@ -120,7 +122,7 @@ machine(L1Cache, "Token protocol")
     Use_TimeoutStarverX,             desc="Timeout";
     Use_TimeoutStarverS,             desc="Timeout";
     Use_TimeoutNoStarvers,             desc="Timeout";
-
+    Use_TimeoutNoStarvers_NoMig,     desc="Timeout Don't Migrate";
   }
 
   // TYPES
@@ -143,6 +145,7 @@ machine(L1Cache, "Token protocol")
 
     bool WentPersistent, default="false",  desc="Request went persistent";
     bool ExternalResponse, default="false", desc="Response came from an external controller";
+    bool IsAtomic, default="false",       desc="Request was an atomic request";
 
     AccessType AccessType,                desc="Type of request (used for profiling)";
     Time IssueTime,                       desc="Time the request was issued";
@@ -361,8 +364,14 @@ machine(L1Cache, "Token protocol")
       return Event:Load;
     } else if (type == CacheRequestType:IFETCH) {
       return Event:Ifetch;
-    } else if ((type == CacheRequestType:ST) || (type == CacheRequestType:ATOMIC)) {
+    } else if (type == CacheRequestType:ST) {
       return Event:Store;
+    } else if (type == CacheRequestType:ATOMIC) {
+      if (no_mig_atomic) {
+        return Event:Atomic;
+      } else {
+        return Event:Store;
+      }
     } else {
       error("Invalid CacheRequestType");
     }
@@ -422,13 +431,16 @@ machine(L1Cache, "Token protocol")
       if (persistentTable.isLocked(useTimerTable.readyAddress()) && (persistentTable.findSmallest(useTimerTable.readyAddress()) != machineID)) {
         if (persistentTable.typeOfSmallest(useTimerTable.readyAddress()) == AccessType:Write) {
           trigger(Event:Use_TimeoutStarverX, useTimerTable.readyAddress());
-        }
-        else {
+        } else {
           trigger(Event:Use_TimeoutStarverS, useTimerTable.readyAddress());
         }
-      }
-      else {
-        trigger(Event:Use_TimeoutNoStarvers, useTimerTable.readyAddress());
+      } else {
+        assert(L1_TBEs.isPresent(useTimerTable.readyAddress()));
+        if (no_mig_atomic && L1_TBEs[useTimerTable.readyAddress()].IsAtomic) {
+          trigger(Event:Use_TimeoutNoStarvers_NoMig, useTimerTable.readyAddress());
+        } else {
+          trigger(Event:Use_TimeoutNoStarvers, useTimerTable.readyAddress());
+        }
       }
     }
   }
@@ -1245,6 +1257,9 @@ machine(L1Cache, "Token protocol")
     peek(mandatoryQueue_in, CacheMsg) {
       L1_TBEs[address].PC := in_msg.ProgramCounter;
       L1_TBEs[address].AccessType := cache_request_type_to_access_type(in_msg.Type);
+      if (in_msg.Type == CacheRequestType:ATOMIC) {
+        L1_TBEs[address].IsAtomic := true;
+      }
       L1_TBEs[address].Prefetch := in_msg.Prefetch;
       L1_TBEs[address].AccessMode := in_msg.AccessMode;
     }
@@ -1444,7 +1459,7 @@ machine(L1Cache, "Token protocol")
     zz_recycleMandatoryQueue;
   }
 
-  transition({IM, SM, OM, IS, IM_L, IS_L, SM_L}, Store) {
+  transition({IM, SM, OM, IS, IM_L, IS_L, SM_L}, {Store, Atomic}) {
     zz_recycleMandatoryQueue;
   }
 
@@ -1475,7 +1490,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(NP, Store, IM) {
+  transition(NP, {Store, Atomic}, IM) {
     ii_allocateL1DCacheBlock;
     i_allocateTBE;
     b_issueWriteRequest;
@@ -1511,7 +1526,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(I, Store, IM) {
+  transition(I, {Store, Atomic}, IM) {
     i_allocateTBE;
     b_issueWriteRequest;
     uu_profileMiss;
@@ -1570,7 +1585,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(S, Store, SM) {
+  transition(S, {Store, Atomic}, SM) {
     i_allocateTBE;
     b_issueWriteRequest;
     uu_profileMiss;
@@ -1646,7 +1661,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(O, Store, OM) {
+  transition(O, {Store, Atomic}, OM) {
     i_allocateTBE;
     b_issueWriteRequest;
     uu_profileMiss;
@@ -1723,7 +1738,17 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition({MM, MM_W}, Store) {
+  transition({MM_W}, {Store, Atomic}) {
+    hh_store_hit;
+    k_popMandatoryQueue;
+  }
+
+  transition(MM, Store) {
+    hh_store_hit;
+    k_popMandatoryQueue;
+  }
+
+  transition(MM, Atomic, M) {
     hh_store_hit;
     k_popMandatoryQueue;
   }
@@ -1755,12 +1780,16 @@ machine(L1Cache, "Token protocol")
     l_popPersistentQueue;
   }
 
-
   transition(MM_W, Use_TimeoutNoStarvers, MM) {
     s_deallocateTBE;
     jj_unsetUseTimer;
   }
 
+  transition(MM_W, Use_TimeoutNoStarvers_NoMig, M) {
+    s_deallocateTBE;
+    jj_unsetUseTimer;
+  }
+
   // Transitions from Dirty Exclusive
   transition({M, M_W}, {Load, Ifetch}) {
     h_load_hit;
@@ -1772,11 +1801,21 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
+  transition(M, Atomic) {
+    hh_store_hit;
+    k_popMandatoryQueue;
+  }
+
   transition(M_W, Store, MM_W) {
     hh_store_hit;
     k_popMandatoryQueue;
   }
 
+  transition(M_W, Atomic) {
+    hh_store_hit;
+    k_popMandatoryQueue;
+  }
+
   transition(M, L1_Replacement, I) {
     c_ownedReplacement;
     gg_deallocateL1CacheBlock;
@@ -1825,7 +1864,7 @@ machine(L1Cache, "Token protocol")
   }
 
   // someone unlocked during timeout
-  transition(M_W, Use_TimeoutNoStarvers, M) {
+  transition(M_W, {Use_TimeoutNoStarvers, Use_TimeoutNoStarvers_NoMig}, M) {
     s_deallocateTBE;
     jj_unsetUseTimer;
   }
@@ -2065,7 +2104,7 @@ machine(L1Cache, "Token protocol")
     k_popMandatoryQueue;
   }
 
-  transition(I_L, Store, IM_L) {
+  transition(I_L, {Store, Atomic}, IM_L) {
     ii_allocateL1DCacheBlock;
     i_allocateTBE;
     b_issueWriteRequest;
@@ -2076,7 +2115,7 @@ machine(L1Cache, "Token protocol")
 
   // Transitions from S_L
 
-  transition(S_L, Store, SM_L) {
+  transition(S_L, {Store, Atomic}, SM_L) {
     i_allocateTBE;
     b_issueWriteRequest;
     uu_profileMiss;
diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index 44ae479c7..7b49c075c 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -40,7 +40,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
   CacheMemory * L2cacheMemory,
   int cache_response_latency = 10,
   int issue_latency = 2,
-  int l2_cache_hit_latency = 10
+  int l2_cache_hit_latency = 10,
+  bool no_mig_atomic = true
 {
 
   // NETWORK BUFFERS
@@ -94,6 +95,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     // Requests
     Other_GETX,      desc="A GetX from another processor";
     Other_GETS,      desc="A GetS from another processor";
+    Other_GETS_No_Mig, desc="A GetS from another processor";
 
     // Responses
     Ack,             desc="Received an ack message";
@@ -122,6 +124,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     bool Dirty,              desc="Is the data dirty (different than memory)?";
     DataBlock DataBlk,       desc="data for the block";
     bool FromL2, default="false", desc="block just moved from L2";
+    bool AtomicAccessed, default="false", desc="block just moved from L2";
   }
 
   // TBE fields
@@ -280,7 +283,15 @@ machine(L1Cache, "AMD Hammer-like protocol")
         if (in_msg.Type == CoherenceRequestType:GETX) {
           trigger(Event:Other_GETX, in_msg.Address);
         } else if (in_msg.Type == CoherenceRequestType:GETS) {
-          trigger(Event:Other_GETS, in_msg.Address);
+          if (isCacheTagPresent(in_msg.Address)) {
+            if (getCacheEntry(in_msg.Address).AtomicAccessed && no_mig_atomic) {
+              trigger(Event:Other_GETS_No_Mig, in_msg.Address);
+            } else {
+              trigger(Event:Other_GETS, in_msg.Address);
+            }
+          } else {
+            trigger(Event:Other_GETS, in_msg.Address);
+          }
         } else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
           trigger(Event:Writeback_Ack, in_msg.Address);
         } else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
@@ -538,12 +549,16 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") {
     DEBUG_EXPR(getCacheEntry(address).DataBlk);
+    peek(mandatoryQueue_in, CacheMsg) {
+      sequencer.writeCallback(address, 
+                              testAndClearLocalHit(address), 
+                              getCacheEntry(address).DataBlk);
 
-    sequencer.writeCallback(address, 
-                            testAndClearLocalHit(address), 
-                            getCacheEntry(address).DataBlk);
-
-    getCacheEntry(address).Dirty := true;
+      getCacheEntry(address).Dirty := true;
+      if (in_msg.Type == CacheRequestType:ATOMIC) {
+        getCacheEntry(address).AtomicAccessed := true;
+      }
+    }
   }
 
   action(sx_external_store_hit, "sx", desc="store required external msgs.") {
@@ -798,7 +813,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     zz_recycleMandatoryQueue;
   }
 
-  transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS}) {
+  transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
     // stall
   }
 
@@ -948,7 +963,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     rr_deallocateL2CacheBlock;
   }
 
-  transition(I, {Other_GETX, Other_GETS}) {
+  transition(I, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
     f_sendAck;
     l_popForwardQueue;
   }
@@ -975,7 +990,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(S, Other_GETS) {
+  transition(S, {Other_GETS, Other_GETS_No_Mig}) {
     ff_sendAckShared;
     l_popForwardQueue;
   }
@@ -1005,7 +1020,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(O, Other_GETS) {
+  transition(O, {Other_GETS, Other_GETS_No_Mig}) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
@@ -1037,6 +1052,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
   
+  transition(MM, Other_GETS_No_Mig, O) {
+    ee_sendDataShared;
+    l_popForwardQueue;
+  }
+  
   // Transitions from Dirty Exclusive
   transition(M, {Load, Ifetch}) {
     h_load_hit;
@@ -1059,14 +1079,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(M, Other_GETS, O) {
+  transition(M, {Other_GETS, Other_GETS_No_Mig}, O) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
 
   // Transitions from IM
 
-  transition(IM, {Other_GETX, Other_GETS}) {
+  transition(IM, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
     f_sendAck;
     l_popForwardQueue;
   }
@@ -1093,7 +1113,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from SM
-  transition(SM, Other_GETS) {
+  transition(SM, {Other_GETS, Other_GETS_No_Mig}) {
     ff_sendAckShared;
     l_popForwardQueue;
   }
@@ -1138,7 +1158,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition(OM, Other_GETS) {
+  transition(OM, {Other_GETS, Other_GETS_No_Mig}) {
     ee_sendDataShared;
     l_popForwardQueue;
   }
@@ -1158,7 +1178,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
 
   // Transitions from IS
 
-  transition(IS, {Other_GETX, Other_GETS}) {
+  transition(IS, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
     f_sendAck;
     l_popForwardQueue;
   }
@@ -1274,7 +1294,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
     l_popForwardQueue;
   }
 
-  transition({OI, MI}, Other_GETS, OI) {
+  transition({OI, MI}, {Other_GETS, Other_GETS_No_Mig}, OI) {
     q_sendDataFromTBEToCache;
     l_popForwardQueue;
   }
@@ -1292,7 +1312,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   }
 
   // Transitions from II
-  transition(II, {Other_GETS, Other_GETX}, II) {
+  transition(II, {Other_GETS, Other_GETS_No_Mig, Other_GETX}, II) {
     f_sendAck;
     l_popForwardQueue;
   }
author	Brad Beckmann <Brad.Beckmann@amd.com>	2010-08-20 11:46:13 -0700
committer	Brad Beckmann <Brad.Beckmann@amd.com>	2010-08-20 11:46:13 -0700
commit	72044e3f5a5b9455e07180806793127be2014451 (patch)
tree	fe2e7f7f129a31dccf84ee2d0bfc93d9c76b7f07
parent	bcdd19df03ed047e4ce03dbf81bc40173cc3217b (diff)
download	gem5-72044e3f5a5b9455e07180806793127be2014451.tar.xz