9 files changed, 189 insertions, 98 deletions
diff --git a/src/mem/ruby/system/CacheMemory.cc b/src/mem/ruby/system/CacheMemory.cc
index 630b94542..cf3e094ad 100644
--- a/src/mem/ruby/system/CacheMemory.cc
+++ b/src/mem/ruby/system/CacheMemory.cc
@@ -83,10 +83,8 @@ void CacheMemory::init(const vector<string> & argv)
     }
   }
 
-  assert(cache_size != -1);
-  
-  m_cache_num_sets = (cache_size / m_cache_assoc) / RubySystem::getBlockSizeBytes();
-  assert(m_cache_num_sets > 1);
+  int num_lines = cache_size/RubySystem::getBlockSizeBytes();
+  m_cache_num_sets = num_lines / m_cache_assoc;
   m_cache_num_set_bits = log_int(m_cache_num_sets);
   assert(m_cache_num_set_bits > 0);
 
@@ -122,7 +120,7 @@ CacheMemory::~CacheMemory()
 }
 
 int
-CacheMemory::numberOfLastLevelCaches() 
+CacheMemory::numberOfLastLevelCaches()
 { 
   return m_num_last_level_caches; 
 }
@@ -165,13 +163,10 @@ int CacheMemory::findTagInSet(Index cacheSet, const Address& tag) const
 {
   assert(tag == line_address(tag));
   // search the set for the tags
-  for (int i=0; i < m_cache_assoc; i++) {
-    if ((m_cache[cacheSet][i] != NULL) &&
-        (m_cache[cacheSet][i]->m_Address == tag) &&
-        (m_cache[cacheSet][i]->m_Permission != AccessPermission_NotPresent)) {
-      return i;
-    }
-  }
+  m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag);
+  if (it != m_tag_index.end())
+    if (m_cache[cacheSet][it->second]->m_Permission != AccessPermission_NotPresent)
+      return it->second;
   return -1; // Not found
 }
 
@@ -181,10 +176,9 @@ int CacheMemory::findTagInSetIgnorePermissions(Index cacheSet, const Address& ta
 {
   assert(tag == line_address(tag));
   // search the set for the tags
-  for (int i=0; i < m_cache_assoc; i++) {
-    if (m_cache[cacheSet][i] != NULL && m_cache[cacheSet][i]->m_Address == tag)
-      return i;
-  }
+  m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag);
+  if (it != m_tag_index.end())
+    return it->second;
   return -1; // Not found
 }
 
@@ -291,6 +285,7 @@ void CacheMemory::allocate(const Address& address, AbstractCacheEntry* entry)
       m_cache[cacheSet][i]->m_Address = address;
       m_cache[cacheSet][i]->m_Permission = AccessPermission_Invalid;
       m_locked[cacheSet][i] = -1;
+      m_tag_index[address] = i;
 
       m_replacementPolicy_ptr->touch(cacheSet, i, g_eventQueue_ptr->getTime());
 
@@ -311,6 +306,7 @@ void CacheMemory::deallocate(const Address& address)
     delete m_cache[cacheSet][location];
     m_cache[cacheSet][location] = NULL;
     m_locked[cacheSet][location] = -1;
+    m_tag_index.erase(address);
   }
 }
 
diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh
index 856b7bcac..8b84f33ec 100644
--- a/src/mem/ruby/system/CacheMemory.hh
+++ b/src/mem/ruby/system/CacheMemory.hh
@@ -54,6 +54,7 @@
 #include "mem/ruby/slicc_interface/AbstractController.hh"
 #include "mem/ruby/profiler/CacheProfiler.hh"
 #include "mem/protocol/CacheMsg.hh"
+#include "base/hashmap.hh"
 #include <vector>
 
 class CacheMemory {
@@ -70,8 +71,6 @@ public:
   //  static CacheMemory* createCache(int level, int num, char split_type, AbstractCacheEntry* (*entry_factory)());
   //  static CacheMemory* getCache(int cache_id);
 
-  static int numberOfLastLevelCaches();
-  
   // Public Methods
   void printConfig(ostream& out);
 
@@ -106,6 +105,8 @@ public:
   AccessPermission getPermission(const Address& address) const;
   void changePermission(const Address& address, AccessPermission new_perm);
 
+  static int numberOfLastLevelCaches();
+
   int getLatency() const { return m_latency; }
 
   // Hook for checkpointing the contents of the cache
@@ -158,6 +159,7 @@ private:
 
   // The first index is the # of cache lines.
   // The second index is the the amount associativity.
+  m5::hash_map<Address, int> m_tag_index;
   Vector<Vector<AbstractCacheEntry*> > m_cache;
   Vector<Vector<int> > m_locked;
 
@@ -169,9 +171,11 @@ private:
   int m_cache_num_set_bits;
   int m_cache_assoc;
 
+  static Vector< CacheMemory* > m_all_caches;
+  
   static int m_num_last_level_caches;
   static MachineType m_last_level_machine_type;
-  static Vector< CacheMemory* > m_all_caches;
+
 };
 
 #endif //CACHEMEMORY_H
diff --git a/src/mem/ruby/system/DMASequencer.hh b/src/mem/ruby/system/DMASequencer.hh
index 1f60b95ec..77c0a2258 100644
--- a/src/mem/ruby/system/DMASequencer.hh
+++ b/src/mem/ruby/system/DMASequencer.hh
@@ -25,6 +25,7 @@ public:
   void init(const vector<string> & argv);
   /* external interface */
   int64_t makeRequest(const RubyRequest & request);
+  bool isReady(const RubyRequest & request, bool dont_set = false) { assert(0); return false;};
   //  void issueRequest(uint64_t paddr, uint8* data, int len, bool rw);
   bool busy() { return m_is_busy;}
 
diff --git a/src/mem/ruby/system/DirectoryMemory.cc b/src/mem/ruby/system/DirectoryMemory.cc
index e230059ad..9b2a3873c 100644
--- a/src/mem/ruby/system/DirectoryMemory.cc
+++ b/src/mem/ruby/system/DirectoryMemory.cc
@@ -44,7 +44,7 @@
 
 int DirectoryMemory::m_num_directories = 0;
 int DirectoryMemory::m_num_directories_bits = 0;
-int DirectoryMemory::m_total_size_bytes = 0;
+uint64_t DirectoryMemory::m_total_size_bytes = 0;
 
 DirectoryMemory::DirectoryMemory(const string & name)
  : m_name(name)
diff --git a/src/mem/ruby/system/DirectoryMemory.hh b/src/mem/ruby/system/DirectoryMemory.hh
index 39de679ed..09211fd83 100644
--- a/src/mem/ruby/system/DirectoryMemory.hh
+++ b/src/mem/ruby/system/DirectoryMemory.hh
@@ -91,7 +91,7 @@ private:
 
   static int m_num_directories;
   static int m_num_directories_bits;
-  static int m_total_size_bytes;
+  static uint64_t m_total_size_bytes;
 
   MemoryVector* m_ram;
 };
diff --git a/src/mem/ruby/system/MemoryVector.hh b/src/mem/ruby/system/MemoryVector.hh
index c5f3cea7f..775244840 100644
--- a/src/mem/ruby/system/MemoryVector.hh
+++ b/src/mem/ruby/system/MemoryVector.hh
@@ -21,61 +21,105 @@ class MemoryVector {
   void write(const Address & paddr, uint8* data, int len);
   uint8* read(const Address & paddr, uint8* data, int len);
 
- private:
-  uint8* getBlockPtr(const Address & paddr);
+private:
+  uint8* getBlockPtr(const PhysAddress & addr);
 
   uint32 m_size;
-  uint8* m_vec;
+  uint8** m_pages;
+  uint32 m_num_pages;
+  const uint32 m_page_offset_mask;
 };
 
 inline
 MemoryVector::MemoryVector()
+  : m_page_offset_mask(4095)
 {
   m_size = 0;
-  m_vec = NULL;
+  m_num_pages = 0;
+  m_pages = NULL;
 }
 
 inline
 MemoryVector::MemoryVector(uint32 size)
+  : m_page_offset_mask(4095)
 {
-  m_size = size;
-  m_vec = new uint8[size];
+  setSize(size);
 }
 
 inline
 MemoryVector::~MemoryVector()
 {
-  delete [] m_vec;
+  for (int i=0; i<m_num_pages; i++) {
+    if (m_pages[i] != 0) {
+      delete [] m_pages[i];
+    }
+  }
+  delete [] m_pages;
 }
 
 inline
 void MemoryVector::setSize(uint32 size)
 {
+  if (m_pages != NULL){
+    for (int i=0; i<m_num_pages; i++) {
+      if (m_pages[i] != 0) {
+        delete [] m_pages[i];
+      }
+    }
+    delete [] m_pages;
+  }
   m_size = size;
-  if (m_vec != NULL)
-    delete [] m_vec;
-  m_vec = new uint8[size];
+  assert(size%4096 == 0);
+  m_num_pages = size >> 12;
+  m_pages = new uint8*[m_num_pages];
+  memset(m_pages, 0, m_num_pages * sizeof(uint8*));
 }
 
 inline
 void MemoryVector::write(const Address & paddr, uint8* data, int len)
 {
   assert(paddr.getAddress() + len <= m_size);
-  memcpy(m_vec + paddr.getAddress(), data, len);
+  uint32 page_num = paddr.getAddress() >> 12;
+  if (m_pages[page_num] == 0) {
+    bool all_zeros = true;
+    for (int i=0;i<len;i++) {
+      if (data[i] != 0) {
+        all_zeros = false;
+        break;
+      }
+    }
+    if (all_zeros) return;
+    m_pages[page_num] = new uint8[4096];
+    memset(m_pages[page_num], 0, 4096);
+    uint32 offset = paddr.getAddress() & m_page_offset_mask;
+    memcpy(&m_pages[page_num][offset], data, len);
+  } else {
+    memcpy(&m_pages[page_num][paddr.getAddress()&m_page_offset_mask], data, len);
+  }
 }
 
 inline
 uint8* MemoryVector::read(const Address & paddr, uint8* data, int len)
 {
   assert(paddr.getAddress() + len <= m_size);
-  memcpy(data, m_vec + paddr.getAddress(), len);
+  uint32 page_num = paddr.getAddress() >> 12;
+  if (m_pages[page_num] == 0) {
+    memset(data, 0, len);
+  } else {
+    memcpy(data, &m_pages[page_num][paddr.getAddress()&m_page_offset_mask], len);
+  }
   return data;
 }
 
 inline
-uint8* MemoryVector::getBlockPtr(const Address & paddr)
+uint8* MemoryVector::getBlockPtr(const PhysAddress & paddr)
 {
-  return m_vec + paddr.getAddress();
+  uint32 page_num = paddr.getAddress() >> 12;
+  if (m_pages[page_num] == 0) {
+    m_pages[page_num] = new uint8[4096];
+    memset(m_pages[page_num], 0, 4096);
+  }
+  return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask];
 }
 
 #endif // MEMORYVECTOR_H
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index c693e0f37..a8b4b075a 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -27,6 +27,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "mem/ruby/libruby.hh"
 #include "mem/ruby/common/Global.hh"
 #include "mem/ruby/system/Sequencer.hh"
 #include "mem/ruby/system/System.hh"
@@ -44,14 +45,14 @@
 //Sequencer::Sequencer(int core_id, MessageBuffer* mandatory_q)
 
 #define LLSC_FAIL -2
-ostream& operator<<(std::ostream& out, const SequencerRequest& obj) {
-  out << obj.ruby_request << flush;
-  return out;
-}
-
+long int already = 0;
 Sequencer::Sequencer(const string & name)
   :RubyPort(name)
 {
+  m_store_waiting_on_load_cycles = 0;
+  m_store_waiting_on_store_cycles = 0;
+  m_load_waiting_on_store_cycles = 0;
+  m_load_waiting_on_load_cycles = 0;
 }
 
 void Sequencer::init(const vector<string> & argv)
@@ -65,8 +66,8 @@ void Sequencer::init(const vector<string> & argv)
   m_instCache_ptr = NULL;
   m_dataCache_ptr = NULL;
   m_controller = NULL;
-  m_servicing_atomic = -1;
-  m_atomics_counter = 0;
+  m_atomic_reads = 0;
+  m_atomic_writes = 0;
   for (size_t i=0; i<argv.size(); i+=2) {
     if ( argv[i] == "controller") {
       m_controller = RubySystem::getController(argv[i+1]); // args[i] = "L1Cache"
@@ -110,8 +111,9 @@ void Sequencer::wakeup() {
     SequencerRequest* request = m_readRequestTable.lookup(keys[i]);
     if (current_time - request->issue_time >= m_deadlock_threshold) {
       WARN_MSG("Possible Deadlock detected");
-      WARN_EXPR(request->ruby_request);
+      WARN_EXPR(request);
       WARN_EXPR(m_version);
+      WARN_EXPR(request->ruby_request.paddr);
       WARN_EXPR(keys.size());
       WARN_EXPR(current_time);
       WARN_EXPR(request->issue_time);
@@ -125,7 +127,7 @@ void Sequencer::wakeup() {
     SequencerRequest* request = m_writeRequestTable.lookup(keys[i]);
     if (current_time - request->issue_time >= m_deadlock_threshold) {
       WARN_MSG("Possible Deadlock detected");
-      WARN_EXPR(request->ruby_request);
+      WARN_EXPR(request);
       WARN_EXPR(m_version);
       WARN_EXPR(current_time);
       WARN_EXPR(request->issue_time);
@@ -145,6 +147,14 @@ void Sequencer::wakeup() {
   }
 }
 
+void Sequencer::printStats(ostream & out) const {
+  out << "Sequencer: " << m_name << endl;
+  out << "  store_waiting_on_load_cycles: " << m_store_waiting_on_load_cycles << endl;
+  out << "  store_waiting_on_store_cycles: " << m_store_waiting_on_store_cycles << endl;
+  out << "  load_waiting_on_load_cycles: " << m_load_waiting_on_load_cycles << endl;
+  out << "  load_waiting_on_store_cycles: " << m_load_waiting_on_store_cycles << endl;
+}
+
 void Sequencer::printProgress(ostream& out) const{
   /*
   int total_demand = 0;
@@ -267,6 +277,7 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
   assert(m_writeRequestTable.exist(line_address(address)));
 
   SequencerRequest* request = m_writeRequestTable.lookup(address);
+
   removeRequest(request);
 
   assert((request->ruby_request.type == RubyRequestType_ST) ||
@@ -282,7 +293,7 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
     m_controller->set_atomic(address);
   }
   else if (request->ruby_request.type == RubyRequestType_RMW_Write) {
-    m_controller->clear_atomic();
+    m_controller->clear_atomic(address);
   }
 
   hitCallback(request, data);
@@ -354,47 +365,33 @@ void Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data) {
 }
 
 // Returns true if the sequencer already has a load or store outstanding
-bool Sequencer::isReady(const RubyRequest& request) {
-  // POLINA: check if we are currently flushing the write buffer, if so Ruby is returned as not ready
-  // to simulate stalling of the front-end
-  // Do we stall all the sequencers? If it is atomic instruction - yes!
-  if (m_outstanding_count >= m_max_outstanding_requests) {
-    return false;
-  }
-
-  if( m_writeRequestTable.exist(line_address(Address(request.paddr))) ||
-      m_readRequestTable.exist(line_address(Address(request.paddr))) ){
-    //cout << "OUTSTANDING REQUEST EXISTS " << p << " VER " << m_version << endl;
-    //printProgress(cout);
-    return false;
-  }
-
-  if (m_servicing_atomic != -1 && m_servicing_atomic != (int)request.proc_id) {
-    assert(m_atomics_counter > 0);
-    return false;
-  }
-  else {
-    if (request.type == RubyRequestType_RMW_Read) {
-      if (m_servicing_atomic == -1) {
-        assert(m_atomics_counter == 0);
-        m_servicing_atomic = (int)request.proc_id;
-      }
-      else {
-        assert(m_servicing_atomic == (int)request.proc_id);
-      }
-      m_atomics_counter++;
+int Sequencer::isReady(const RubyRequest& request) {
+  bool is_outstanding_store = m_writeRequestTable.exist(line_address(Address(request.paddr)));
+  bool is_outstanding_load = m_readRequestTable.exist(line_address(Address(request.paddr)));
+  if ( is_outstanding_store ) {
+    if ((request.type == RubyRequestType_LD) ||
+        (request.type == RubyRequestType_IFETCH) ||
+        (request.type == RubyRequestType_RMW_Read)) {
+      m_store_waiting_on_load_cycles++;
+    } else {
+      m_store_waiting_on_store_cycles++;
     }
-    else if (request.type == RubyRequestType_RMW_Write) {
-      assert(m_servicing_atomic == (int)request.proc_id);
-      assert(m_atomics_counter > 0);
-      m_atomics_counter--;
-      if (m_atomics_counter == 0) {
-        m_servicing_atomic = -1;
-      }
+    return LIBRUBY_ALIASED_REQUEST;
+  } else if ( is_outstanding_load ) {
+    if ((request.type == RubyRequestType_ST) ||
+        (request.type == RubyRequestType_RMW_Write) ) {
+      m_load_waiting_on_store_cycles++;
+    } else {
+      m_load_waiting_on_load_cycles++;
     }
+    return LIBRUBY_ALIASED_REQUEST;
   }
 
-  return true;
+  if (m_outstanding_count >= m_max_outstanding_requests) {
+    return LIBRUBY_BUFFER_FULL;
+  }
+  
+  return 1;
 }
 
 bool Sequencer::empty() const {
@@ -405,11 +402,12 @@ bool Sequencer::empty() const {
 int64_t Sequencer::makeRequest(const RubyRequest & request)
 {
   assert(Address(request.paddr).getOffset() + request.len <= RubySystem::getBlockSizeBytes());
-  if (isReady(request)) {
+  int ready = isReady(request);
+  if (ready > 0) {
     int64_t id = makeUniqueRequestID();
     SequencerRequest *srequest = new SequencerRequest(request, id, g_eventQueue_ptr->getTime());
     bool found = insertRequest(srequest);
-    if (!found)
+    if (!found) {
       if (request.type == RubyRequestType_Locked_Write) {
         // NOTE: it is OK to check the locked flag here as the mandatory queue will be checked first
         // ensuring that nothing comes between checking the flag and servicing the store
@@ -420,16 +418,17 @@ int64_t Sequencer::makeRequest(const RubyRequest & request)
           m_dataCache_ptr->clearLocked(line_address(Address(request.paddr)));
         }
       }
-      if (request.type == RubyRequestType_RMW_Write) {
-        m_controller->started_writes();
-      }
       issueRequest(request);
 
-    // TODO: issue hardware prefetches here
-    return id;
-  }
-  else {
-    return -1;
+      // TODO: issue hardware prefetches here
+      return id;
+    }
+    else {
+      assert(0);
+      return 0;
+    }
+  } else {
+    return ready;
   }
 }
 
@@ -439,24 +438,61 @@ void Sequencer::issueRequest(const RubyRequest& request) {
   CacheRequestType ctype;
   switch(request.type) {
   case RubyRequestType_IFETCH:
+    if (m_atomic_reads > 0 && m_atomic_writes == 0) {
+      m_controller->reset_atomics();
+      m_atomic_writes = 0;
+      m_atomic_reads = 0;
+    }
+    else if (m_atomic_writes > 0) {
+      assert(m_atomic_reads > m_atomic_writes);
+      cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
+      assert(false);
+    }
     ctype = CacheRequestType_IFETCH;
     break;
   case RubyRequestType_LD:
+    if (m_atomic_reads > 0 && m_atomic_writes == 0) {
+      m_controller->reset_atomics();
+      m_atomic_writes = 0;
+      m_atomic_reads = 0;
+    }
+    else if (m_atomic_writes > 0) {
+      assert(m_atomic_reads > m_atomic_writes);
+      cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
+      assert(false);
+    }
     ctype = CacheRequestType_LD;
     break;
   case RubyRequestType_ST:
+    if (m_atomic_reads > 0 && m_atomic_writes == 0) {
+      m_controller->reset_atomics();
+      m_atomic_writes = 0;
+      m_atomic_reads = 0;
+    }
+    else if (m_atomic_writes > 0) {
+      assert(m_atomic_reads > m_atomic_writes);
+      cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
+      assert(false);
+    }
     ctype = CacheRequestType_ST;
     break;
   case RubyRequestType_Locked_Read:
-    ctype = CacheRequestType_ST;
-    break;
   case RubyRequestType_Locked_Write:
-    ctype = CacheRequestType_ST;
+    ctype = CacheRequestType_ATOMIC;
     break;
   case RubyRequestType_RMW_Read:
+    assert(m_atomic_writes == 0);
+    m_atomic_reads++;
     ctype = CacheRequestType_ATOMIC;
     break;
   case RubyRequestType_RMW_Write:
+    assert(m_atomic_reads > 0);
+    assert(m_atomic_writes < m_atomic_reads);
+    m_atomic_writes++;
+    if (m_atomic_reads == m_atomic_writes) {
+      m_atomic_reads = 0;
+      m_atomic_writes = 0;
+    }
     ctype = CacheRequestType_ATOMIC;
     break;
   default:
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index cf12c2a0b..ce53dd8d7 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -86,10 +86,11 @@ public:
 
   // called by Tester or Simics
   int64_t makeRequest(const RubyRequest & request);
-  bool isReady(const RubyRequest& request);
+  int isReady(const RubyRequest& request);
   bool empty() const;
 
   void print(ostream& out) const;
+  void printStats(ostream & out) const;
   void checkCoherence(const Address& address);
 
   //  bool getRubyMemoryValue(const Address& addr, char* value, unsigned int size_in_bytes);
@@ -127,8 +128,13 @@ private:
   // Global outstanding request count, across all request tables
   int m_outstanding_count;
   bool m_deadlock_check_scheduled;
-  int m_servicing_atomic;
-  int m_atomics_counter;
+  int m_atomic_reads;
+  int m_atomic_writes;
+
+  int m_store_waiting_on_load_cycles;
+  int m_store_waiting_on_store_cycles;
+  int m_load_waiting_on_store_cycles;
+  int m_load_waiting_on_load_cycles;
 };
 
 // Output operator declaration
diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc
index ad67cdc80..4ce919618 100644
--- a/src/mem/ruby/system/System.cc
+++ b/src/mem/ruby/system/System.cc
@@ -335,6 +335,10 @@ void RubySystem::printStats(ostream& out)
 
   m_profiler_ptr->printStats(out);
   m_network_ptr->printStats(out);
+  for (map<string, Sequencer*>::const_iterator it = m_sequencers.begin();
+       it != m_sequencers.end(); it++) {
+    (*it).second->printStats(out);
+  }
   for (map<string, CacheMemory*>::const_iterator it = m_caches.begin();
        it != m_caches.end(); it++) {
     (*it).second->printStats(out);