From 0cede15d6c5213d83e4cd143014321b2ee7ec5eb Mon Sep 17 00:00:00 2001
From: Nilay Vaish <nilay@cs.wisc.edu>
Date: Sat, 12 Feb 2011 11:41:20 -0600
Subject: Ruby: Reorder Cache Lookup in Protocol Files The patch changes the
 order in which L1 dcache and icache are looked up when a request comes in.
 Earlier, if a request came in for instruction fetch, the dcache was looked up
 before the icache, to correctly handle self-modifying code. But, in the
 common case, dcache is going to report a miss and the subsequent icache
 lookup is going to report a hit. Given the invariant - caches under the same
 controller keep track of disjoint sets of cache blocks, we can move the
 icache lookup before the dcache lookup. In case of a hit in the icache, using
 our invariant, we know that the dcache would have reported a miss. In  case
 of a miss in the icache, we know that icache would have missed even if the
 dcache was looked up before looking up the icache. Effectively, we are doing
 the same thing as before, though in the common case, we expect reduction in
 the number of lookups. This was empirically confirmed for MOESI hammer. The
 ratio lookups to access requests is now about 1.1 to 1.

---
 src/mem/protocol/MESI_CMP_directory-L1cache.sm  | 35 ++++++++-------
 src/mem/protocol/MOESI_CMP_directory-L1cache.sm | 32 +++++++-------
 src/mem/protocol/MOESI_CMP_token-L1cache.sm     | 35 +++++++--------
 src/mem/protocol/MOESI_hammer-cache.sm          | 57 +++++++++++++------------
 4 files changed, 82 insertions(+), 77 deletions(-)

(limited to 'src/mem')

diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
index 8744a7122..4442cee41 100644
--- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
@@ -287,20 +287,21 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 asks the L2 for it.
             trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
                     L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
+            }
+
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
@@ -313,21 +314,23 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
             }
           }
         } else {
-          // *** DATA ACCESS ***
-          // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
-          }
 
+          // *** DATA ACCESS ***
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 ask the L2 for it
             trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
                     L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
+            }
+
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
index 4082f23c9..7f0ab62a8 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
@@ -338,14 +338,6 @@ machine(L1Cache, "Directory protocol")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          // Check to see if it is in the OTHER L1
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry,
-                    TBEs[in_msg.LineAddress]);
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 asks the L2 for it.
@@ -353,6 +345,14 @@ machine(L1Cache, "Directory protocol")
                     in_msg.LineAddress, L1Icache_entry,
                     TBEs[in_msg.LineAddress]);
           } else {
+
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            // Check to see if it is in the OTHER L1
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry,
+                      TBEs[in_msg.LineAddress]);
+            }
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type),
@@ -369,14 +369,6 @@ machine(L1Cache, "Directory protocol")
         } else {
           // *** DATA ACCESS ***
 
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          // Check to see if it is in the OTHER L1
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, TBEs[in_msg.LineAddress]);
-          }
-
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 ask the L2 for it
@@ -384,6 +376,14 @@ machine(L1Cache, "Directory protocol")
                     in_msg.LineAddress, L1Dcache_entry,
                     TBEs[in_msg.LineAddress]);
           } else {
+
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            // Check to see if it is in the OTHER L1
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, TBEs[in_msg.LineAddress]);
+            }
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type),
diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
index 00e9404c9..226f21374 100644
--- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
@@ -647,20 +647,21 @@ machine(L1Cache, "Token protocol")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-              trigger(Event:L1_Replacement, in_msg.LineAddress,
-                      L1Dcache_entry, tbe);
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Icache_entry, tbe);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+                trigger(Event:L1_Replacement, in_msg.LineAddress,
+                        L1Dcache_entry, tbe);
+            }
+
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               trigger(mandatory_request_type_to_event(in_msg.Type),
@@ -676,21 +677,21 @@ machine(L1Cache, "Token protocol")
         } else {
           // *** DATA ACCESS ***
 
-            // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, tbe);
-          }
-
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Dcache_entry, tbe);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, tbe);
+            }
+
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               trigger(mandatory_request_type_to_event(in_msg.Type),
diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index 78bc9e3e7..ab2a6acf4 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -377,26 +377,26 @@ machine(L1Cache, "AMD Hammer-like protocol")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-            if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
-              trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe);
-            } else {
-              trigger(Event:L2_Replacement,
-                      L2cacheMemory.cacheProbe(in_msg.LineAddress),
-                      getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
-                      TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
-            }
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Icache_entry, tbe);
           } else {
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+                trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe);
+              } else {
+                trigger(Event:L2_Replacement,
+                        L2cacheMemory.cacheProbe(in_msg.LineAddress),
+                        getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
+                        TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
+              }
+            }
+
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
 
@@ -430,26 +430,27 @@ machine(L1Cache, "AMD Hammer-like protocol")
         } else {
           // *** DATA ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-            if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
-              trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe);
-            } else {
-              trigger(Event:L2_Replacement,
-                      L2cacheMemory.cacheProbe(in_msg.LineAddress),
-                      getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
-                      TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
-            }
-          }
-
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Dcache_entry, tbe);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+                trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe);
+              } else {
+                trigger(Event:L2_Replacement,
+                        L2cacheMemory.cacheProbe(in_msg.LineAddress),
+                        getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
+                        TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
+              }
+            }
+
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               Entry L2cache_entry := getL2CacheEntry(in_msg.LineAddress);
-- 
cgit v1.2.3


From 343e94a257baa94575adf0d0def18ffe8da0c4f8 Mon Sep 17 00:00:00 2001
From: Nilay Vaish <nilay@cs.wisc.edu>
Date: Mon, 14 Feb 2011 16:14:54 -0600
Subject: Ruby: Improve Change PerfectSwitch's wakeup function Currently the
 wakeup function for the PerfectSwitch contains three loops -

loop on number of virtual networks
  loop on number of incoming links
	    loop till all messages for this (link, network) have been routed

With an 8 processor mesh network and Hammer protocol, about 11-12% of the
was observed to have been spent in this function, which is the highest
amongst all the functions. It was found that the innermost loop is executed
about 45 times per invocation of the wakeup function, when each invocation
of the wakeup function processes just about one message.

The patch tries to do away with the redundant executions of the innermost
loop. Counters have been added for each virtual network that record the
number of messages that need to be routed for that virtual network. The
inner loops are only executed when the number of messages for that particular
virtual network > 0. This does away with almost 80% of the executions of the
innermost loop. The function now consumes about 5-6% of the total execution
time.
---
 src/mem/ruby/buffers/MessageBuffer.cc          |   3 +
 src/mem/ruby/buffers/MessageBuffer.hh          |   6 +
 src/mem/ruby/common/Consumer.hh                |   1 +
 src/mem/ruby/network/simple/PerfectSwitch.cc   | 281 +++++++++++++------------
 src/mem/ruby/network/simple/PerfectSwitch.hh   |   2 +
 src/mem/ruby/slicc_interface/Message.hh        |   2 +
 src/mem/ruby/slicc_interface/NetworkMessage.hh |   7 +
 7 files changed, 170 insertions(+), 132 deletions(-)

(limited to 'src/mem')

diff --git a/src/mem/ruby/buffers/MessageBuffer.cc b/src/mem/ruby/buffers/MessageBuffer.cc
index f6b79c580..225595005 100644
--- a/src/mem/ruby/buffers/MessageBuffer.cc
+++ b/src/mem/ruby/buffers/MessageBuffer.cc
@@ -58,6 +58,8 @@ MessageBuffer::MessageBuffer(const string &name)
     m_name = name;
 
     m_stall_msg_map.clear();
+    m_input_link_id = 0;
+    m_vnet_id = 0;
 }
 
 int
@@ -228,6 +230,7 @@ MessageBuffer::enqueue(MsgPtr message, Time delta)
     // Schedule the wakeup
     if (m_consumer_ptr != NULL) {
         g_eventQueue_ptr->scheduleEventAbsolute(m_consumer_ptr, arrival_time);
+        m_consumer_ptr->storeEventInfo(m_vnet_id);
     } else {
         panic("No consumer: %s name: %s\n", *this, m_name);
     }
diff --git a/src/mem/ruby/buffers/MessageBuffer.hh b/src/mem/ruby/buffers/MessageBuffer.hh
index 62cc65670..88df5b788 100644
--- a/src/mem/ruby/buffers/MessageBuffer.hh
+++ b/src/mem/ruby/buffers/MessageBuffer.hh
@@ -142,6 +142,9 @@ class MessageBuffer
     void printStats(std::ostream& out);
     void clearStats() { m_not_avail_count = 0; m_msg_counter = 0; }
 
+    void setIncomingLink(int link_id) { m_input_link_id = link_id; }
+    void setVnet(int net) { m_vnet_id = net; }
+
   private:
     //added by SS
     int m_recycle_latency;
@@ -184,6 +187,9 @@ class MessageBuffer
     bool m_ordering_set;
     bool m_randomization;
     Time m_last_arrival_time;
+
+    int m_input_link_id;
+    int m_vnet_id;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/common/Consumer.hh b/src/mem/ruby/common/Consumer.hh
index c1f8bc42e..a119abb39 100644
--- a/src/mem/ruby/common/Consumer.hh
+++ b/src/mem/ruby/common/Consumer.hh
@@ -67,6 +67,7 @@ class Consumer
 
     virtual void wakeup() = 0;
     virtual void print(std::ostream& out) const = 0;
+    virtual void storeEventInfo(int info) {}
 
     const Time&
     getLastScheduledWakeup() const
diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc
index 7229c724f..5c461c63f 100644
--- a/src/mem/ruby/network/simple/PerfectSwitch.cc
+++ b/src/mem/ruby/network/simple/PerfectSwitch.cc
@@ -54,6 +54,11 @@ PerfectSwitch::PerfectSwitch(SwitchID sid, SimpleNetwork* network_ptr)
     m_round_robin_start = 0;
     m_network_ptr = network_ptr;
     m_wakeups_wo_switch = 0;
+
+    for(int i = 0;i < m_virtual_networks;++i)
+    {
+        m_pending_message_count.push_back(0);
+    }
 }
 
 void
@@ -62,12 +67,15 @@ PerfectSwitch::addInPort(const vector<MessageBuffer*>& in)
     assert(in.size() == m_virtual_networks);
     NodeID port = m_in.size();
     m_in.push_back(in);
+
     for (int j = 0; j < m_virtual_networks; j++) {
         m_in[port][j]->setConsumer(this);
         string desc = csprintf("[Queue from port %s %s %s to PerfectSwitch]",
             NodeIDToString(m_switch_id), NodeIDToString(port),
             NodeIDToString(j));
         m_in[port][j]->setDescription(desc);
+        m_in[port][j]->setIncomingLink(port);
+        m_in[port][j]->setVnet(j);
     }
 }
 
@@ -154,160 +162,169 @@ PerfectSwitch::wakeup()
             m_round_robin_start = 0;
         }
 
-        // for all input ports, use round robin scheduling
-        for (int counter = 0; counter < m_in.size(); counter++) {
-            // Round robin scheduling
-            incoming++;
-            if (incoming >= m_in.size()) {
-                incoming = 0;
-            }
+        if(m_pending_message_count[vnet] > 0) {
+            // for all input ports, use round robin scheduling
+            for (int counter = 0; counter < m_in.size(); counter++) {
+                // Round robin scheduling
+                incoming++;
+                if (incoming >= m_in.size()) {
+                    incoming = 0;
+                }
 
-            // temporary vectors to store the routing results
-            vector<LinkID> output_links;
-            vector<NetDest> output_link_destinations;
-
-            // Is there a message waiting?
-            while (m_in[incoming][vnet]->isReady()) {
-                DPRINTF(RubyNetwork, "incoming: %d\n", incoming);
-
-                // Peek at message
-                msg_ptr = m_in[incoming][vnet]->peekMsgPtr();
-                net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
-                DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr));
-
-                output_links.clear();
-                output_link_destinations.clear();
-                NetDest msg_dsts =
-                    net_msg_ptr->getInternalDestination();
-
-                // Unfortunately, the token-protocol sends some
-                // zero-destination messages, so this assert isn't valid
-                // assert(msg_dsts.count() > 0);
-
-                assert(m_link_order.size() == m_routing_table.size());
-                assert(m_link_order.size() == m_out.size());
-
-                if (m_network_ptr->getAdaptiveRouting()) {
-                    if (m_network_ptr->isVNetOrdered(vnet)) {
-                        // Don't adaptively route
-                        for (int out = 0; out < m_out.size(); out++) {
-                            m_link_order[out].m_link = out;
-                            m_link_order[out].m_value = 0;
-                        }
-                    } else {
-                        // Find how clogged each link is
-                        for (int out = 0; out < m_out.size(); out++) {
-                            int out_queue_length = 0;
-                            for (int v = 0; v < m_virtual_networks; v++) {
-                                out_queue_length += m_out[out][v]->getSize();
+                // temporary vectors to store the routing results
+                vector<LinkID> output_links;
+                vector<NetDest> output_link_destinations;
+
+                // Is there a message waiting?
+                while (m_in[incoming][vnet]->isReady()) {
+                    DPRINTF(RubyNetwork, "incoming: %d\n", incoming);
+
+                    // Peek at message
+                    msg_ptr = m_in[incoming][vnet]->peekMsgPtr();
+                    net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
+                    DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr));
+
+                    output_links.clear();
+                    output_link_destinations.clear();
+                    NetDest msg_dsts =
+                        net_msg_ptr->getInternalDestination();
+
+                    // Unfortunately, the token-protocol sends some
+                    // zero-destination messages, so this assert isn't valid
+                    // assert(msg_dsts.count() > 0);
+
+                    assert(m_link_order.size() == m_routing_table.size());
+                    assert(m_link_order.size() == m_out.size());
+
+                    if (m_network_ptr->getAdaptiveRouting()) {
+                        if (m_network_ptr->isVNetOrdered(vnet)) {
+                            // Don't adaptively route
+                            for (int out = 0; out < m_out.size(); out++) {
+                                m_link_order[out].m_link = out;
+                                m_link_order[out].m_value = 0;
+                            }
+                        } else {
+                            // Find how clogged each link is
+                            for (int out = 0; out < m_out.size(); out++) {
+                                int out_queue_length = 0;
+                                for (int v = 0; v < m_virtual_networks; v++) {
+                                    out_queue_length += m_out[out][v]->getSize();
+                                }
+                                int value =
+                                    (out_queue_length << 8) | (random() & 0xff);
+                                m_link_order[out].m_link = out;
+                                m_link_order[out].m_value = value;
                             }
-                            int value =
-                                (out_queue_length << 8) | (random() & 0xff);
-                            m_link_order[out].m_link = out;
-                            m_link_order[out].m_value = value;
+
+                            // Look at the most empty link first
+                            sort(m_link_order.begin(), m_link_order.end());
                         }
+                    }
 
-                        // Look at the most empty link first
-                        sort(m_link_order.begin(), m_link_order.end());
+                    for (int i = 0; i < m_routing_table.size(); i++) {
+                        // pick the next link to look at
+                        int link = m_link_order[i].m_link;
+                        NetDest dst = m_routing_table[link];
+                        DPRINTF(RubyNetwork, "dst: %s\n", dst);
+
+                        if (!msg_dsts.intersectionIsNotEmpty(dst))
+                            continue;
+
+                        // Remember what link we're using
+                        output_links.push_back(link);
+
+                        // Need to remember which destinations need this
+                        // message in another vector.  This Set is the
+                        // intersection of the routing_table entry and the
+                        // current destination set.  The intersection must
+                        // not be empty, since we are inside "if"
+                        output_link_destinations.push_back(msg_dsts.AND(dst));
+
+                        // Next, we update the msg_destination not to
+                        // include those nodes that were already handled
+                        // by this link
+                        msg_dsts.removeNetDest(dst);
                     }
-                }
 
-                for (int i = 0; i < m_routing_table.size(); i++) {
-                    // pick the next link to look at
-                    int link = m_link_order[i].m_link;
-                    NetDest dst = m_routing_table[link];
-                    DPRINTF(RubyNetwork, "dst: %s\n", dst);
-
-                    if (!msg_dsts.intersectionIsNotEmpty(dst))
-                        continue;
-
-                    // Remember what link we're using
-                    output_links.push_back(link);
-
-                    // Need to remember which destinations need this
-                    // message in another vector.  This Set is the
-                    // intersection of the routing_table entry and the
-                    // current destination set.  The intersection must
-                    // not be empty, since we are inside "if"
-                    output_link_destinations.push_back(msg_dsts.AND(dst));
-
-                    // Next, we update the msg_destination not to
-                    // include those nodes that were already handled
-                    // by this link
-                    msg_dsts.removeNetDest(dst);
-                }
+                    assert(msg_dsts.count() == 0);
+                    //assert(output_links.size() > 0);
+
+                    // Check for resources - for all outgoing queues
+                    bool enough = true;
+                    for (int i = 0; i < output_links.size(); i++) {
+                        int outgoing = output_links[i];
+                        if (!m_out[outgoing][vnet]->areNSlotsAvailable(1))
+                            enough = false;
+                        DPRINTF(RubyNetwork, "Checking if node is blocked\n"
+                                "outgoing: %d, vnet: %d, enough: %d\n",
+                                outgoing, vnet, enough);
+                    }
 
-                assert(msg_dsts.count() == 0);
-                //assert(output_links.size() > 0);
-
-                // Check for resources - for all outgoing queues
-                bool enough = true;
-                for (int i = 0; i < output_links.size(); i++) {
-                    int outgoing = output_links[i];
-                    if (!m_out[outgoing][vnet]->areNSlotsAvailable(1))
-                        enough = false;
-                    DPRINTF(RubyNetwork, "Checking if node is blocked\n"
-                            "outgoing: %d, vnet: %d, enough: %d\n",
-                            outgoing, vnet, enough);
-                }
+                    // There were not enough resources
+                    if (!enough) {
+                        g_eventQueue_ptr->scheduleEvent(this, 1);
+                        DPRINTF(RubyNetwork, "Can't deliver message since a node "
+                                "is blocked\n"
+                                "Message: %s\n", (*net_msg_ptr));
+                        break; // go to next incoming port
+                    }
 
-                // There were not enough resources
-                if (!enough) {
-                    g_eventQueue_ptr->scheduleEvent(this, 1);
-                    DPRINTF(RubyNetwork, "Can't deliver message since a node "
-                            "is blocked\n"
-                            "Message: %s\n", (*net_msg_ptr));
-                    break; // go to next incoming port
-                }
+                    MsgPtr unmodified_msg_ptr;
 
-                MsgPtr unmodified_msg_ptr;
+                    if (output_links.size() > 1) {
+                        // If we are sending this message down more than
+                        // one link (size>1), we need to make a copy of
+                        // the message so each branch can have a different
+                        // internal destination we need to create an
+                        // unmodified MsgPtr because the MessageBuffer
+                        // enqueue func will modify the message
 
-                if (output_links.size() > 1) {
-                    // If we are sending this message down more than
-                    // one link (size>1), we need to make a copy of
-                    // the message so each branch can have a different
-                    // internal destination we need to create an
-                    // unmodified MsgPtr because the MessageBuffer
-                    // enqueue func will modify the message
+                        // This magic line creates a private copy of the
+                        // message
+                        unmodified_msg_ptr = msg_ptr->clone();
+                    }
 
-                    // This magic line creates a private copy of the
-                    // message
-                    unmodified_msg_ptr = msg_ptr->clone();
-                }
+                    // Enqueue it - for all outgoing queues
+                    for (int i=0; i<output_links.size(); i++) {
+                        int outgoing = output_links[i];
 
-                // Enqueue it - for all outgoing queues
-                for (int i=0; i<output_links.size(); i++) {
-                    int outgoing = output_links[i];
+                        if (i > 0) {
+                            // create a private copy of the unmodified
+                            // message
+                            msg_ptr = unmodified_msg_ptr->clone();
+                        }
 
-                    if (i > 0) {
-                        // create a private copy of the unmodified
-                        // message
-                        msg_ptr = unmodified_msg_ptr->clone();
-                    }
+                        // Change the internal destination set of the
+                        // message so it knows which destinations this
+                        // link is responsible for.
+                        net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
+                        net_msg_ptr->getInternalDestination() =
+                            output_link_destinations[i];
 
-                    // Change the internal destination set of the
-                    // message so it knows which destinations this
-                    // link is responsible for.
-                    net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
-                    net_msg_ptr->getInternalDestination() =
-                        output_link_destinations[i];
+                        // Enqeue msg
+                        DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from "
+                                "inport[%d][%d] to outport [%d][%d] time: %lld.\n",
+                                m_switch_id, incoming, vnet, outgoing, vnet,
+                                g_eventQueue_ptr->getTime());
 
-                    // Enqeue msg
-                    DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from "
-                            "inport[%d][%d] to outport [%d][%d] time: %lld.\n",
-                            m_switch_id, incoming, vnet, outgoing, vnet,
-                            g_eventQueue_ptr->getTime());
+                        m_out[outgoing][vnet]->enqueue(msg_ptr);
+                    }
 
-                    m_out[outgoing][vnet]->enqueue(msg_ptr);
+                    // Dequeue msg
+                    m_in[incoming][vnet]->pop();
+                    m_pending_message_count[vnet]--;
                 }
-
-                // Dequeue msg
-                m_in[incoming][vnet]->pop();
             }
         }
     }
 }
 
+void
+PerfectSwitch::storeEventInfo(int info)
+{
+    m_pending_message_count[info]++;
+}
+
 void
 PerfectSwitch::printStats(std::ostream& out) const
 {
diff --git a/src/mem/ruby/network/simple/PerfectSwitch.hh b/src/mem/ruby/network/simple/PerfectSwitch.hh
index a7e577df0..cd0219fd9 100644
--- a/src/mem/ruby/network/simple/PerfectSwitch.hh
+++ b/src/mem/ruby/network/simple/PerfectSwitch.hh
@@ -69,6 +69,7 @@ class PerfectSwitch : public Consumer
     int getOutLinks() const { return m_out.size(); }
 
     void wakeup();
+    void storeEventInfo(int info);
 
     void printStats(std::ostream& out) const;
     void clearStats();
@@ -92,6 +93,7 @@ class PerfectSwitch : public Consumer
     int m_round_robin_start;
     int m_wakeups_wo_switch;
     SimpleNetwork* m_network_ptr;
+    std::vector<int> m_pending_message_count;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/slicc_interface/Message.hh b/src/mem/ruby/slicc_interface/Message.hh
index ff94fdd40..7fcfabe9c 100644
--- a/src/mem/ruby/slicc_interface/Message.hh
+++ b/src/mem/ruby/slicc_interface/Message.hh
@@ -57,6 +57,8 @@ class Message : public RefCounted
 
     virtual Message* clone() const = 0;
     virtual void print(std::ostream& out) const = 0;
+    virtual void setIncomingLink(int) {}
+    virtual void setVnet(int) {}
 
     void setDelayedCycles(const int& cycles) { m_DelayedCycles = cycles; }
     const int& getDelayedCycles() const {return m_DelayedCycles;}
diff --git a/src/mem/ruby/slicc_interface/NetworkMessage.hh b/src/mem/ruby/slicc_interface/NetworkMessage.hh
index 082481e05..a8f9c625b 100644
--- a/src/mem/ruby/slicc_interface/NetworkMessage.hh
+++ b/src/mem/ruby/slicc_interface/NetworkMessage.hh
@@ -82,9 +82,16 @@ class NetworkMessage : public Message
 
     virtual void print(std::ostream& out) const = 0;
 
+    int getIncomingLink() const { return incoming_link; }
+    void setIncomingLink(int link) { incoming_link = link; }
+    int getVnet() const { return vnet; }
+    void setVnet(int net) { vnet = net; }
+
   private:
     NetDest m_internal_dest;
     bool m_internal_dest_valid;
+    int incoming_link;
+    int vnet;
 };
 
 inline std::ostream&
-- 
cgit v1.2.3


From 293ccb703738093db84ae7ec78434433560fe00b Mon Sep 17 00:00:00 2001
From: Nilay Vaish <nilay@cs.wisc.edu>
Date: Sat, 19 Feb 2011 17:32:00 -0600
Subject: Ruby: clean MOESI CMP directory protocol The L1 cache controller file
 contains references to foo and goo queues, which are not in use at all. These
 have been removed.

---
 src/mem/protocol/MOESI_CMP_directory-L1cache.sm | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'src/mem')

diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
index 7f0ab62a8..bb5b35dc1 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
@@ -44,7 +44,6 @@ machine(L1Cache, "Directory protocol")
   // From this node's L1 cache TO the network
   // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
   MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false";
-  MessageBuffer foo, network="To", virtual_network="1", ordered="false";
   // a local L1 -> this L2 bank
   MessageBuffer responseFromL1Cache, network="To", virtual_network="2", ordered="false";
 //  MessageBuffer writebackFromL1Cache, network="To", virtual_network="3", ordered="false";
@@ -53,7 +52,6 @@ machine(L1Cache, "Directory protocol")
   // To this node's L1 cache FROM the network
   // a L2 bank -> this L1
   MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false";
-  MessageBuffer goo, network="From", virtual_network="1", ordered="false";
   // a L2 bank -> this L1
   MessageBuffer responseToL1Cache, network="From", virtual_network="2", ordered="false";
 
@@ -229,7 +227,6 @@ machine(L1Cache, "Directory protocol")
   out_port(requestNetwork_out, RequestMsg, requestFromL1Cache);
   out_port(responseNetwork_out, ResponseMsg, responseFromL1Cache);
   out_port(triggerQueue_out, TriggerMsg, triggerQueue);
-  out_port(foo_out, ResponseMsg, foo);
 
   // ** IN_PORTS **
 
@@ -242,15 +239,6 @@ machine(L1Cache, "Directory protocol")
     }
   }
 
-
-  in_port(goo_in, RequestMsg, goo) {
-    if (goo_in.isReady()) {
-      peek(goo_in, RequestMsg) {
-        assert(false);
-      }
-    }
-  }
-
   // Trigger Queue
   in_port(triggerQueue_in, TriggerMsg, triggerQueue) {
     if (triggerQueue_in.isReady()) {
-- 
cgit v1.2.3


From 77eed184f529c4ccbef59ad2018d18ff3fbb54af Mon Sep 17 00:00:00 2001
From: Nilay Vaish <nilay@cs.wisc.edu>
Date: Sat, 19 Feb 2011 17:32:43 -0600
Subject: Ruby: Machine Type missing in MOESI CMP directory protocol In certain
 actions of the L1 cache controller, while creating an outgoing message, the
 machine type was not being set. This results in a segmentation fault when
 trace is collected. Joseph Pusudesris provided his patch for fixing this
 issue.

---
 src/mem/protocol/MOESI_CMP_directory-L1cache.sm | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'src/mem')

diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
index bb5b35dc1..e590c952a 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
@@ -399,6 +399,7 @@ machine(L1Cache, "Directory protocol")
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:GETS;
         out_msg.Requestor := machineID;
+        out_msg.RequestorMachine := MachineType:L1Cache;
         out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
               l2_select_low_bit, l2_select_num_bits));
         out_msg.MessageSize := MessageSizeType:Request_Control;
@@ -443,6 +444,7 @@ machine(L1Cache, "Directory protocol")
       out_msg.Address := address;
       out_msg.Type := CoherenceRequestType:PUTO;
       out_msg.Requestor := machineID;
+      out_msg.RequestorMachine := MachineType:L1Cache;
       out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
             l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Writeback_Control;
@@ -455,6 +457,7 @@ machine(L1Cache, "Directory protocol")
       out_msg.Address := address;
       out_msg.Type := CoherenceRequestType:PUTS;
       out_msg.Requestor := machineID;
+      out_msg.RequestorMachine := MachineType:L1Cache;
       out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
             l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Writeback_Control;
@@ -469,6 +472,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
                 l2_select_low_bit, l2_select_num_bits));
           out_msg.DataBlk := cache_entry.DataBlk;
@@ -484,6 +488,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(in_msg.Requestor);
           out_msg.DataBlk := cache_entry.DataBlk;
           // out_msg.Dirty := cache_entry.Dirty;
@@ -502,6 +507,7 @@ machine(L1Cache, "Directory protocol")
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:DATA;
       out_msg.Sender := machineID;
+      out_msg.SenderMachine := MachineType:L1Cache;
       out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
             l2_select_low_bit, l2_select_num_bits));
       out_msg.DataBlk := cache_entry.DataBlk;
@@ -580,6 +586,7 @@ machine(L1Cache, "Directory protocol")
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:UNBLOCK;
       out_msg.Sender := machineID;
+      out_msg.SenderMachine := MachineType:L1Cache;
       out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
             l2_select_low_bit, l2_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
@@ -678,6 +685,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(in_msg.Requestor);
           out_msg.DataBlk := tbe.DataBlk;
           // out_msg.Dirty := tbe.Dirty;
@@ -691,6 +699,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
                 l2_select_low_bit, l2_select_num_bits));
           out_msg.DataBlk := tbe.DataBlk;
@@ -711,6 +720,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(in_msg.Requestor);
           out_msg.DataBlk := tbe.DataBlk;
           out_msg.Dirty := tbe.Dirty;
@@ -723,6 +733,7 @@ machine(L1Cache, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
           out_msg.Sender := machineID;
+          out_msg.SenderMachine := MachineType:L1Cache;
           out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
                 l2_select_low_bit, l2_select_num_bits));
           out_msg.DataBlk := tbe.DataBlk;
-- 
cgit v1.2.3