ruby: new atomics implementation

This patch changes the way that Ruby handles atomic RMW instructions. This implementation, unlike the prior one, is protocol independent. It works by locking an address from the sequencer immediately after the read portion of an RMW completes. When that address is locked, the coherence controller will only satisfy requests coming from one port (e.g., the mandatory queue) and will ignore all others. After the write portion completed, the line is unlocked. This should also work with multi-line atomics, as long as the blocks are always acquired in the same order.
author: Derek Hower <drh5@cs.wisc.edu> 2010-01-19 17:11:36 -0600
committer: Derek Hower <drh5@cs.wisc.edu> 2010-01-19 17:11:36 -0600
commit: 07ea0891f1699f6194a05516948ce3824fb8fb38 (patch)
tree: b5c22e3fe49a7e0d277fdb9ac5ee87c2aa0321e5 /src/mem/ruby
parent: 279f179babc9e5663156777c533c06edc91bce9a (diff)
download: gem5-07ea0891f1699f6194a05516948ce3824fb8fb38.tar.xz
8 files changed, 20 insertions, 62 deletions
diff --git a/src/mem/ruby/buffers/MessageBuffer.hh b/src/mem/ruby/buffers/MessageBuffer.hh
index 8440c3335..950423ee5 100644
--- a/src/mem/ruby/buffers/MessageBuffer.hh
+++ b/src/mem/ruby/buffers/MessageBuffer.hh
@@ -64,6 +64,11 @@ public:
             (m_prio_heap.peekMin().m_time <= g_eventQueue_ptr->getTime()));
   }
 
+  void delayHead() {
+    MessageBufferNode node = m_prio_heap.extractMin();
+    enqueue(node.m_msgptr, 1);
+  }
+
   bool areNSlotsAvailable(int n);
   int getPriority() { return m_priority_rank; }
   void setPriority(int rank) { m_priority_rank = rank; }
diff --git a/src/mem/ruby/config/MI_example-homogeneous.rb b/src/mem/ruby/config/MI_example-homogeneous.rb
index 71e20c318..d409e6782 100644
--- a/src/mem/ruby/config/MI_example-homogeneous.rb
+++ b/src/mem/ruby/config/MI_example-homogeneous.rb
@@ -13,7 +13,7 @@ RubySystem.reset
 # default values
 
 num_cores = 2
-l1_cache_size_kb = 32768
+l1_cache_size_bytes = 32768
 l1_cache_assoc = 8
 l1_cache_latency = 1
 num_memories = 2
diff --git a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
index a8ef1eceb..ee22df656 100644
--- a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
+++ b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
@@ -68,8 +68,8 @@ assert((protocol == "MESI_CMP_directory" or protocol == "MOESI_CMP_directory"),
 require protocol+".rb"
 
 num_cores.times { |n|
-  icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
-  dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
+  icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb*1024, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
+  dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb*1024, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
   sequencer = Sequencer.new("Sequencer_"+n.to_s, icache, dcache)
   iface_ports << sequencer
   if protocol == "MOESI_CMP_directory"
@@ -87,7 +87,7 @@ num_cores.times { |n|
   end
 }
 num_l2_banks.times { |n|
-  cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_kb/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
+  cache = SetAssociativeCache.new("l2u_"+n.to_s, (l2_cache_size_kb*1024)/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
   if protocol == "MOESI_CMP_directory"
     net_ports << MOESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
                                                            "L2Cache",
diff --git a/src/mem/ruby/config/cfg.rb b/src/mem/ruby/config/cfg.rb
index d57862420..a20562243 100644
--- a/src/mem/ruby/config/cfg.rb
+++ b/src/mem/ruby/config/cfg.rb
@@ -385,12 +385,12 @@ class DMAController < NetPort
 end
 
 class Cache < LibRubyObject
-  param :size_kb, Integer
+  param :size, Integer
   param :latency, Integer
   param :controller, NetPort
-  def initialize(obj_name, size_kb, latency)
+  def initialize(obj_name, size, latency)
     super(obj_name)
-    self.size_kb = size_kb
+    self.size = size
     self.latency = latency
     # controller must be set manually by the configuration script
     # because there is a cyclic dependence
@@ -406,8 +406,8 @@ class SetAssociativeCache < Cache
   #  when an integer, it represents the number of cycles for a hit
   #  when a float, it represents the cache access time in ns
   #  when set to "auto", libruby will attempt to find a realistic latency by running CACTI
-  def initialize(obj_name, size_kb, latency, assoc, replacement_policy)
-    super(obj_name, size_kb, latency)
+  def initialize(obj_name, size, latency, assoc, replacement_policy)
+    super(obj_name, size, latency)
     self.assoc = assoc
     self.replacement_policy = replacement_policy
   end
@@ -415,7 +415,7 @@ class SetAssociativeCache < Cache
   def calculateLatency()
     if self.latency == "auto"
       cacti_args = Array.new()
-      cacti_args << (self.size_kb*1024) <<  RubySystem.block_size_bytes << self.assoc
+      cacti_args << (self.size*1024) <<  RubySystem.block_size_bytes << self.assoc
       cacti_args << 1 << 0 << 0 << 0 << 1
       cacti_args << RubySystem.tech_nm << RubySystem.block_size_bytes*8
       cacti_args << 0 << 0 << 0 << 1 << 0 << 0 << 0 << 0 << 1
diff --git a/src/mem/ruby/libruby.cc b/src/mem/ruby/libruby.cc
index b9a72d071..57dd13c87 100644
--- a/src/mem/ruby/libruby.cc
+++ b/src/mem/ruby/libruby.cc
@@ -58,11 +58,8 @@ RubyRequestType string_to_RubyRequestType(std::string str)
 
 ostream& operator<<(ostream& out, const RubyRequestType& obj)
 {
-  cerr << "in op" << endl;
   out << RubyRequestType_to_string(obj);
-  cerr << "flushing" << endl;
   out << flush;
-  cerr << "done" << endl;
   return out;
 }
 
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 1d1c56aba..c7062262a 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -21,9 +21,8 @@ public:
   virtual const string toString() const = 0;  // returns text version of controller type
   virtual const string getName() const = 0;   // return instance name
   virtual const MachineType getMachineType() const = 0;
-  virtual void set_atomic(Address addr) = 0;
-  virtual void clear_atomic(Address addr) = 0;
-  virtual void reset_atomics() = 0;
+  virtual void blockOnQueue(Address, MessageBuffer*) = 0;
+  virtual void unblock(Address) = 0;
 
   virtual void print(ostream & out) const = 0;
   virtual void printStats(ostream & out) const = 0;
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index a8b4b075a..b4716c346 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -66,8 +66,6 @@ void Sequencer::init(const vector<string> & argv)
   m_instCache_ptr = NULL;
   m_dataCache_ptr = NULL;
   m_controller = NULL;
-  m_atomic_reads = 0;
-  m_atomic_writes = 0;
   for (size_t i=0; i<argv.size(); i+=2) {
     if ( argv[i] == "controller") {
       m_controller = RubySystem::getController(argv[i+1]); // args[i] = "L1Cache"
@@ -285,15 +283,15 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
          (request->ruby_request.type == RubyRequestType_RMW_Write) ||  
          (request->ruby_request.type == RubyRequestType_Locked_Read) ||
          (request->ruby_request.type == RubyRequestType_Locked_Write));
-  // POLINA: the assumption is that atomics are only on data cache and not instruction cache
+
   if (request->ruby_request.type == RubyRequestType_Locked_Read) {
     m_dataCache_ptr->setLocked(address, m_version);
   }
   else if (request->ruby_request.type == RubyRequestType_RMW_Read) {
-    m_controller->set_atomic(address);
+    m_controller->blockOnQueue(address, m_mandatory_q_ptr);
   }
   else if (request->ruby_request.type == RubyRequestType_RMW_Write) {
-    m_controller->clear_atomic(address);
+    m_controller->unblock(address);
   }
 
   hitCallback(request, data);
@@ -438,42 +436,12 @@ void Sequencer::issueRequest(const RubyRequest& request) {
   CacheRequestType ctype;
   switch(request.type) {
   case RubyRequestType_IFETCH:
-    if (m_atomic_reads > 0 && m_atomic_writes == 0) {
-      m_controller->reset_atomics();
-      m_atomic_writes = 0;
-      m_atomic_reads = 0;
-    }
-    else if (m_atomic_writes > 0) {
-      assert(m_atomic_reads > m_atomic_writes);
-      cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
-      assert(false);
-    }
     ctype = CacheRequestType_IFETCH;
     break;
   case RubyRequestType_LD:
-    if (m_atomic_reads > 0 && m_atomic_writes == 0) {
-      m_controller->reset_atomics();
-      m_atomic_writes = 0;
-      m_atomic_reads = 0;
-    }
-    else if (m_atomic_writes > 0) {
-      assert(m_atomic_reads > m_atomic_writes);
-      cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
-      assert(false);
-    }
     ctype = CacheRequestType_LD;
     break;
   case RubyRequestType_ST:
-    if (m_atomic_reads > 0 && m_atomic_writes == 0) {
-      m_controller->reset_atomics();
-      m_atomic_writes = 0;
-      m_atomic_reads = 0;
-    }
-    else if (m_atomic_writes > 0) {
-      assert(m_atomic_reads > m_atomic_writes);
-      cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
-      assert(false);
-    }
     ctype = CacheRequestType_ST;
     break;
   case RubyRequestType_Locked_Read:
@@ -481,18 +449,9 @@ void Sequencer::issueRequest(const RubyRequest& request) {
     ctype = CacheRequestType_ATOMIC;
     break;
   case RubyRequestType_RMW_Read:
-    assert(m_atomic_writes == 0);
-    m_atomic_reads++;
     ctype = CacheRequestType_ATOMIC;
     break;
   case RubyRequestType_RMW_Write:
-    assert(m_atomic_reads > 0);
-    assert(m_atomic_writes < m_atomic_reads);
-    m_atomic_writes++;
-    if (m_atomic_reads == m_atomic_writes) {
-      m_atomic_reads = 0;
-      m_atomic_writes = 0;
-    }
     ctype = CacheRequestType_ATOMIC;
     break;
   default:
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index ce53dd8d7..1621bbbdc 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -128,8 +128,6 @@ private:
   // Global outstanding request count, across all request tables
   int m_outstanding_count;
   bool m_deadlock_check_scheduled;
-  int m_atomic_reads;
-  int m_atomic_writes;
 
   int m_store_waiting_on_load_cycles;
   int m_store_waiting_on_store_cycles;
author	Derek Hower <drh5@cs.wisc.edu>	2010-01-19 17:11:36 -0600
committer	Derek Hower <drh5@cs.wisc.edu>	2010-01-19 17:11:36 -0600
commit	07ea0891f1699f6194a05516948ce3824fb8fb38 (patch)
tree	b5c22e3fe49a7e0d277fdb9ac5ee87c2aa0321e5 /src/mem/ruby
parent	279f179babc9e5663156777c533c06edc91bce9a (diff)
download	gem5-07ea0891f1699f6194a05516948ce3824fb8fb38.tar.xz