From 877be2009c0871effb3494b52ce5221d58a594d6 Mon Sep 17 00:00:00 2001
From: Brad Beckmann <Brad.Beckmann@amd.com>
Date: Wed, 18 Nov 2009 16:34:32 -0800
Subject: ruby: Changes necessary to get the hammer protocol to work in GEM5

---
 src/mem/protocol/MOESI_hammer-cache.sm | 178 +++++++++++++++++++--------------
 1 file changed, 103 insertions(+), 75 deletions(-)

(limited to 'src/mem/protocol/MOESI_hammer-cache.sm')

diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index d244a9b93..6fb5091af 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 2009 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,9 +25,27 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD's contributions to the MOESI hammer protocol do not constitute an 
+ * endorsement of its similarity to any AMD products.
+ *
+ * Authors: Milo Martin
+ *          Brad Beckmann
  */
 
-machine(L1Cache, "AMD Hammer-like protocol") {
+machine(L1Cache, "AMD Hammer-like protocol") 
+: int cache_response_latency,
+  int issue_latency
+{
+
+  // NETWORK BUFFERS
+  MessageBuffer requestFromCache, network="To", virtual_network="3", ordered="false";
+  MessageBuffer responseFromCache, network="To", virtual_network="1", ordered="false";
+  MessageBuffer unblockFromCache, network="To", virtual_network="0", ordered="false";
+
+  MessageBuffer forwardToCache, network="From", virtual_network="2", ordered="false";
+  MessageBuffer responseToCache, network="From", virtual_network="1", ordered="false";
+
 
   // STATES
   enumeration(State, desc="Cache states", default="L1Cache_State_I") {
@@ -82,14 +101,16 @@ machine(L1Cache, "AMD Hammer-like protocol") {
 
   // TYPES
 
+  // STRUCTURE DEFINITIONS
+
+  MessageBuffer mandatoryQueue, ordered="false";
+  Sequencer sequencer, factory='RubySystem::getSequencer(m_cfg["sequencer"])';
+
   // CacheEntry
-  structure(Entry, desc="...") {
-    Address Address,         desc="Address of this block, required by CacheMemory";
-    Time LastRef,            desc="Last time this block was referenced, required by CacheMemory";
-    AccessPermission Permission, desc="Access permission for this block, required by CacheMemory"; 
-    DataBlock DataBlk,       desc="data for the block, required by CacheMemory";
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
     State CacheState,        desc="cache state";
     bool Dirty,              desc="Is the data dirty (different than memory)?";
+    DataBlock DataBlk,       desc="data for the block";
   }
 
   // TBE fields
@@ -101,27 +122,28 @@ machine(L1Cache, "AMD Hammer-like protocol") {
     bool Sharers,            desc="On a GetS, did we find any other sharers in the system";
   }
 
-  external_type(NewCacheMemory) {
+  external_type(CacheMemory) {
     bool cacheAvail(Address);
     Address cacheProbe(Address);
-    void allocate(Address);
+    void allocate(Address, Entry);
     void deallocate(Address);
     Entry lookup(Address);
     void changePermission(Address, AccessPermission);
     bool isTagPresent(Address);
+    void profileMiss(CacheMsg);
   }
 
-  external_type(NewTBETable) {
+  external_type(TBETable) {
     TBE lookup(Address);
     void allocate(Address);
     void deallocate(Address);
     bool isPresent(Address);
   }
 
-  NewTBETable TBEs, template_hack="<L1Cache_TBE>";
-  NewCacheMemory L1IcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,"L1I"';
-  NewCacheMemory L1DcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,"L1D"';
-  NewCacheMemory L2cacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,"L2"';
+  TBETable TBEs, template_hack="<L1Cache_TBE>";
+  CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])';
+  CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])';
+  CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])';
 
   Entry getCacheEntry(Address addr), return_by_ref="yes" {
     if (L2cacheMemory.isTagPresent(addr)) {
@@ -284,36 +306,36 @@ machine(L1Cache, "AMD Hammer-like protocol") {
           // ** INSTRUCTION ACCESS ***
 
           // Check to see if it is in the OTHER L1
-          if (L1DcacheMemory.isTagPresent(in_msg.Address)) {
+          if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) {
             // The block is in the wrong L1, try to write it to the L2
-            if (L2cacheMemory.cacheAvail(in_msg.Address)) {
-              trigger(Event:L1_to_L2, in_msg.Address);
+            if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+              trigger(Event:L1_to_L2, in_msg.LineAddress);
             } else {
-              trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.Address));
+              trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.LineAddress));
             }
           }
 
-          if (L1IcacheMemory.isTagPresent(in_msg.Address)) { 
+          if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { 
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
-            trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+            trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
           } else {
-            if (L1IcacheMemory.cacheAvail(in_msg.Address)) {
+            if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
-              if (L2cacheMemory.isTagPresent(in_msg.Address)) {
+              if (L2cacheMemory.isTagPresent(in_msg.LineAddress)) {
                 // L2 has it (maybe not with the right permissions)
-                trigger(Event:L2_to_L1I, in_msg.Address);
+                trigger(Event:L2_to_L1I, in_msg.LineAddress);
               } else {
                 // We have room, the L2 doesn't have it, so the L1 fetches the line
-                trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+                trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
               }
             } else {
               // No room in the L1, so we need to make room
-              if (L2cacheMemory.cacheAvail(L1IcacheMemory.cacheProbe(in_msg.Address))) {
+              if (L2cacheMemory.cacheAvail(L1IcacheMemory.cacheProbe(in_msg.LineAddress))) {
                 // The L2 has room, so we move the line from the L1 to the L2
-                trigger(Event:L1_to_L2, L1IcacheMemory.cacheProbe(in_msg.Address));
+                trigger(Event:L1_to_L2, L1IcacheMemory.cacheProbe(in_msg.LineAddress));
               } else {
                 // The L2 does not have room, so we replace a line from the L2
-                trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1IcacheMemory.cacheProbe(in_msg.Address)));
+                trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1IcacheMemory.cacheProbe(in_msg.LineAddress)));
               }
             }
           }
@@ -321,36 +343,36 @@ machine(L1Cache, "AMD Hammer-like protocol") {
           // *** DATA ACCESS ***
 
           // Check to see if it is in the OTHER L1
-          if (L1IcacheMemory.isTagPresent(in_msg.Address)) {
+          if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) {
             // The block is in the wrong L1, try to write it to the L2
-            if (L2cacheMemory.cacheAvail(in_msg.Address)) {
-              trigger(Event:L1_to_L2, in_msg.Address);
+            if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+              trigger(Event:L1_to_L2, in_msg.LineAddress);
             } else {
-              trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.Address));
+              trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.LineAddress));
             }
           }
 
-          if (L1DcacheMemory.isTagPresent(in_msg.Address)) { 
+          if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { 
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
-            trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+            trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
           } else {
-            if (L1DcacheMemory.cacheAvail(in_msg.Address)) {
+            if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
-              if (L2cacheMemory.isTagPresent(in_msg.Address)) {
+              if (L2cacheMemory.isTagPresent(in_msg.LineAddress)) {
                 // L2 has it (maybe not with the right permissions)
-                trigger(Event:L2_to_L1D, in_msg.Address);
+                trigger(Event:L2_to_L1D, in_msg.LineAddress);
               } else {
                 // We have room, the L2 doesn't have it, so the L1 fetches the line
-                trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+                trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
               }
             } else {
               // No room in the L1, so we need to make room
-              if (L2cacheMemory.cacheAvail(L1DcacheMemory.cacheProbe(in_msg.Address))) {
+              if (L2cacheMemory.cacheAvail(L1DcacheMemory.cacheProbe(in_msg.LineAddress))) {
                 // The L2 has room, so we move the line from the L1 to the L2
-                trigger(Event:L1_to_L2, L1DcacheMemory.cacheProbe(in_msg.Address));
+                trigger(Event:L1_to_L2, L1DcacheMemory.cacheProbe(in_msg.LineAddress));
               } else {
                 // The L2 does not have room, so we replace a line from the L2
-                trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1DcacheMemory.cacheProbe(in_msg.Address)));
+                trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1DcacheMemory.cacheProbe(in_msg.LineAddress)));
               }
             }
           }
@@ -362,33 +384,33 @@ machine(L1Cache, "AMD Hammer-like protocol") {
   // ACTIONS
 
   action(a_issueGETS, "a", desc="Issue GETS") {
-    enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") {
+    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceRequestType:GETS;
-      out_msg.Requestor := id;
-      out_msg.Destination.add(map_address_to_node(address));
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
       out_msg.MessageSize := MessageSizeType:Request_Control;
-      TBEs[address].NumPendingMsgs := numberOfNodes(); // One from each other processor (n-1) plus the memory (+1)
+      TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); // One from each other cache (n-1) plus the memory (+1)
     }
   }
 
   action(b_issueGETX, "b", desc="Issue GETX") {
-    enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") {
+    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceRequestType:GETX;
-      out_msg.Requestor := id;
-      out_msg.Destination.add(map_address_to_node(address));
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
       out_msg.MessageSize := MessageSizeType:Request_Control;
-      TBEs[address].NumPendingMsgs := numberOfNodes(); // One from each other processor (n-1) plus the memory (+1)
+      TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); // One from each other cache (n-1) plus the memory (+1)
     }
   }
 
   action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") {
+      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
-        out_msg.Sender := id;
+        out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.DataBlk := getCacheEntry(address).DataBlk;
         out_msg.Dirty := getCacheEntry(address).Dirty;
@@ -399,21 +421,21 @@ machine(L1Cache, "AMD Hammer-like protocol") {
   }
 
   action(d_issuePUT, "d", desc="Issue PUT") {
-    enqueue(requestNetwork_out, RequestMsg, latency="CACHE_LATENCY") {
+    enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceRequestType:PUT;
-      out_msg.Requestor := id;
-      out_msg.Destination.add(map_address_to_node(address));
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
       out_msg.MessageSize := MessageSizeType:Writeback_Control;
     }
   }
 
   action(e_sendData, "e", desc="Send data from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") {
+      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:DATA;
-        out_msg.Sender := id;
+        out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.DataBlk := getCacheEntry(address).DataBlk;
         out_msg.Dirty := getCacheEntry(address).Dirty;
@@ -425,10 +447,10 @@ machine(L1Cache, "AMD Hammer-like protocol") {
 
   action(ee_sendDataShared, "\e", desc="Send data from cache to requestor, keep a shared copy") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") {
+      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:DATA_SHARED;
-        out_msg.Sender := id;
+        out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.DataBlk := getCacheEntry(address).DataBlk;
         out_msg.Dirty := getCacheEntry(address).Dirty;
@@ -440,10 +462,10 @@ machine(L1Cache, "AMD Hammer-like protocol") {
   
   action(f_sendAck, "f", desc="Send ack from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") {
+      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:ACK;
-        out_msg.Sender := id;
+        out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.Acks := 1;
         out_msg.MessageSize := MessageSizeType:Response_Control;
@@ -453,10 +475,10 @@ machine(L1Cache, "AMD Hammer-like protocol") {
 
   action(ff_sendAckShared, "\f", desc="Send shared ack from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") {
+      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:ACK_SHARED;
-        out_msg.Sender := id;
+        out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.Acks := 1;
         out_msg.MessageSize := MessageSizeType:Response_Control;
@@ -465,11 +487,11 @@ machine(L1Cache, "AMD Hammer-like protocol") {
   }
 
   action(g_sendUnblock, "g", desc="Send unblock to memory") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency="NULL_LATENCY") {
+    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:UNBLOCK;
-      out_msg.Sender := id;
-      out_msg.Destination.add(map_address_to_node(address));
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
     }
   }
@@ -541,10 +563,10 @@ machine(L1Cache, "AMD Hammer-like protocol") {
 
   action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") {
     peek(forwardToCache_in, RequestMsg) {
-      enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") {
+      enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
         out_msg.Address := address;
         out_msg.Type := CoherenceResponseType:DATA;
-        out_msg.Sender := id;
+        out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.DataBlk := TBEs[address].DataBlk;
         out_msg.Dirty := TBEs[address].Dirty;
@@ -555,10 +577,10 @@ machine(L1Cache, "AMD Hammer-like protocol") {
   }
 
   action(qq_sendDataFromTBEToMemory, "\q", desc="Send data from TBE to memory") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency="CACHE_LATENCY") {
+    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
       out_msg.Address := address;
-      out_msg.Sender := id;
-      out_msg.Destination.add(map_address_to_node(address));
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
       out_msg.Dirty := TBEs[address].Dirty;
       if (TBEs[address].Dirty) {
         out_msg.Type := CoherenceResponseType:WB_DIRTY;
@@ -583,10 +605,10 @@ machine(L1Cache, "AMD Hammer-like protocol") {
   }
 
   action(t_sendExclusiveDataFromTBEToMemory, "t", desc="Send exclusive data from TBE to memory") {
-    enqueue(unblockNetwork_out, ResponseMsg, latency="CACHE_LATENCY") {
+    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
       out_msg.Address := address;
-      out_msg.Sender := id;
-      out_msg.Destination.add(map_address_to_node(address));
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
       out_msg.DataBlk := TBEs[address].DataBlk; 
       out_msg.Dirty := TBEs[address].Dirty;
       if (TBEs[address].Dirty) {
@@ -628,18 +650,18 @@ machine(L1Cache, "AMD Hammer-like protocol") {
   
   action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") {
     if (L1DcacheMemory.isTagPresent(address) == false) {
-      L1DcacheMemory.allocate(address);
+      L1DcacheMemory.allocate(address, new Entry);
     }
   }
 
   action(jj_allocateL1ICacheBlock, "\j", desc="Set L1 I-cache tag equal to tag of block B.") {
     if (L1IcacheMemory.isTagPresent(address) == false) {
-      L1IcacheMemory.allocate(address);
+      L1IcacheMemory.allocate(address, new Entry);
     }
   }
 
   action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") {
-    L2cacheMemory.allocate(address);
+    L2cacheMemory.allocate(address, new Entry);
   }
 
   action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block.  Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
@@ -664,7 +686,13 @@ machine(L1Cache, "AMD Hammer-like protocol") {
 
   action(uu_profileMiss, "\u", desc="Profile the demand miss") {
     peek(mandatoryQueue_in, CacheMsg) {
-      profile_miss(in_msg, id);
+      if (L1IcacheMemory.isTagPresent(address)) {
+        L1IcacheMemory.profileMiss(in_msg);
+      } else if (L1DcacheMemory.isTagPresent(address)) {
+        L1DcacheMemory.profileMiss(in_msg);
+      } else {
+        L2cacheMemory.profileMiss(in_msg);
+      }
     }
   }
 
-- 
cgit v1.2.3