39 files changed, 591 insertions, 1437 deletions
diff --git a/.hgtags b/.hgtags
index 105dc1895..61c0396b9 100644
--- a/.hgtags
+++ b/.hgtags
@@ -18,3 +18,4 @@ dce5a8655829b7d2e24ce40cafc9c8873a71671f m5_2.0_beta5
 1ac44b6c87ec71a8410c9a9c219269eca71f8077 m5_2.0_beta4
 60a931b03fb165807f02bcccc4f7d0fd705a67a9 copyright_update
 d8b246a665c160a31751b4091f097022cde16dd7 m5_2.0_beta6
+5de565c4b7bdf46670611858b709c1eb50ad7c5c Calvin_Submission
diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
index 32669190f..8c8bbfaa6 100644
--- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
@@ -27,12 +27,6 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * $Id: MSI_MOSI_CMP_directory-L1cache.sm 1.10 05/01/19 15:55:40-06:00 beckmann@s0-28.cs.wisc.edu $
- *
- */
-
-
 machine(L1Cache, "MSI Directory L1 Cache CMP")
  : int l1_request_latency,
    int l1_response_latency,
@@ -47,15 +41,15 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
   // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
   MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false";
   // a local L1 -> this L2 bank
-  MessageBuffer responseFromL1Cache, network="To", virtual_network="3", ordered="false";
-  MessageBuffer unblockFromL1Cache, network="To", virtual_network="4", ordered="false";
+  MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false";
+  MessageBuffer unblockFromL1Cache, network="To", virtual_network="2", ordered="false";
 
 
   // To this node's L1 cache FROM the network
   // a L2 bank -> this L1
-  MessageBuffer requestToL1Cache, network="From", virtual_network="1", ordered="false";
+  MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false";
   // a L2 bank -> this L1
-  MessageBuffer responseToL1Cache, network="From", virtual_network="3", ordered="false";
+  MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false";
 
   // STATES
   enumeration(State, desc="Cache states", default="L1Cache_State_I") {
diff --git a/src/mem/protocol/MESI_CMP_directory-L2cache.sm b/src/mem/protocol/MESI_CMP_directory-L2cache.sm
index 6439e4fb3..b82d77ddb 100644
--- a/src/mem/protocol/MESI_CMP_directory-L2cache.sm
+++ b/src/mem/protocol/MESI_CMP_directory-L2cache.sm
@@ -40,14 +40,14 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
 
   // L2 BANK QUEUES
   // From local bank of L2 cache TO the network
-  MessageBuffer DirRequestFromL2Cache, network="To", virtual_network="2", ordered="false";  // this L2 bank -> Memory
-  MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="1", ordered="false";  // this L2 bank -> a local L1
-  MessageBuffer responseFromL2Cache, network="To", virtual_network="3", ordered="false";  // this L2 bank -> a local L1 || Memory
+  MessageBuffer DirRequestFromL2Cache, network="To", virtual_network="0", ordered="false";  // this L2 bank -> Memory
+  MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="0", ordered="false";  // this L2 bank -> a local L1
+  MessageBuffer responseFromL2Cache, network="To", virtual_network="1", ordered="false";  // this L2 bank -> a local L1 || Memory
 
   // FROM the network to this local bank of L2 cache
-  MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false";  // a local L1 || Memory -> this L2 bank
+  MessageBuffer unblockToL2Cache, network="From", virtual_network="2", ordered="false";  // a local L1 || Memory -> this L2 bank
   MessageBuffer L1RequestToL2Cache, network="From", virtual_network="0", ordered="false";  // a local L1 -> this L2 bank
-  MessageBuffer responseToL2Cache, network="From", virtual_network="3", ordered="false";  // a local L1 || Memory -> this L2 bank
+  MessageBuffer responseToL2Cache, network="From", virtual_network="1", ordered="false";  // a local L1 || Memory -> this L2 bank
 //  MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false";  // a local L1 || Memory -> this L2 bank
 
   // STATES
diff --git a/src/mem/protocol/MESI_CMP_directory-dma.sm b/src/mem/protocol/MESI_CMP_directory-dma.sm
index 191df5dfa..143c465ef 100644
--- a/src/mem/protocol/MESI_CMP_directory-dma.sm
+++ b/src/mem/protocol/MESI_CMP_directory-dma.sm
@@ -3,8 +3,8 @@ machine(DMA, "DMA Controller")
 : int request_latency
 {
 
-  MessageBuffer responseFromDir, network="From", virtual_network="6", ordered="true", no_vector="true";
-  MessageBuffer reqToDirectory, network="To", virtual_network="7", ordered="false", no_vector="true";
+  MessageBuffer responseFromDir, network="From", virtual_network="1", ordered="true", no_vector="true";
+  MessageBuffer reqToDirectory, network="To", virtual_network="0", ordered="false", no_vector="true";
 
   enumeration(State, desc="DMA states", default="DMA_State_READY") {
     READY, desc="Ready to accept a new request";
@@ -51,13 +51,13 @@ machine(DMA, "DMA Controller")
     }
   }
 
-  in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") {
+  in_port(dmaResponseQueue_in, ResponseMsg, responseFromDir, desc="...") {
     if (dmaResponseQueue_in.isReady()) {
-      peek( dmaResponseQueue_in, DMAResponseMsg) {
-        if (in_msg.Type == DMAResponseType:ACK) {
-          trigger(Event:Ack, in_msg.LineAddress);
-        } else if (in_msg.Type == DMAResponseType:DATA) {
-          trigger(Event:Data, in_msg.LineAddress);
+      peek( dmaResponseQueue_in, ResponseMsg) {
+        if (in_msg.Type == CoherenceResponseType:ACK) {
+          trigger(Event:Ack, makeLineAddress(in_msg.Address));
+        } else if (in_msg.Type == CoherenceResponseType:DATA) {
+          trigger(Event:Data, makeLineAddress(in_msg.Address));
         } else {
           error("Invalid response type");
         }
@@ -67,10 +67,9 @@ machine(DMA, "DMA Controller")
 
   action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") {
     peek(dmaRequestQueue_in, SequencerMsg) {
-      enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
-        out_msg.PhysicalAddress := in_msg.PhysicalAddress;
-        out_msg.LineAddress := in_msg.LineAddress; 
-        out_msg.Type := DMARequestType:READ;
+      enqueue(reqToDirectory_out, RequestMsg, latency=request_latency) {
+        out_msg.Address := in_msg.PhysicalAddress;
+        out_msg.Type := CoherenceRequestType:DMA_READ;
         out_msg.DataBlk := in_msg.DataBlk;
         out_msg.Len := in_msg.Len;
         out_msg.Destination.add(map_Address_to_Directory(address));
@@ -81,10 +80,9 @@ machine(DMA, "DMA Controller")
 
   action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") {
     peek(dmaRequestQueue_in, SequencerMsg) {
-      enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
-          out_msg.PhysicalAddress := in_msg.PhysicalAddress;
-          out_msg.LineAddress := in_msg.LineAddress; 
-          out_msg.Type := DMARequestType:WRITE;
+      enqueue(reqToDirectory_out, RequestMsg, latency=request_latency) {
+          out_msg.Address := in_msg.PhysicalAddress;
+          out_msg.Type := CoherenceRequestType:DMA_WRITE;
           out_msg.DataBlk := in_msg.DataBlk;
           out_msg.Len := in_msg.Len;
           out_msg.Destination.add(map_Address_to_Directory(address));
@@ -94,13 +92,11 @@ machine(DMA, "DMA Controller")
   }
 
   action(a_ackCallback, "a", desc="Notify dma controller that write request completed") {
-    peek (dmaResponseQueue_in, DMAResponseMsg) {
-      dma_sequencer.ackCallback();
-    }
+    dma_sequencer.ackCallback();
   }
 
   action(d_dataCallback, "d", desc="Write data to dma sequencer") {
-    peek (dmaResponseQueue_in, DMAResponseMsg) {
+    peek (dmaResponseQueue_in, ResponseMsg) {
       dma_sequencer.dataCallback(in_msg.DataBlk);
     }
   }
diff --git a/src/mem/protocol/MESI_CMP_directory-mem.sm b/src/mem/protocol/MESI_CMP_directory-mem.sm
index f5a2e431b..7e30883b0 100644
--- a/src/mem/protocol/MESI_CMP_directory-mem.sm
+++ b/src/mem/protocol/MESI_CMP_directory-mem.sm
@@ -40,13 +40,11 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
    int directory_latency
 {
 
-  MessageBuffer requestToDir, network="From", virtual_network="2", ordered="false";
-  MessageBuffer responseToDir, network="From", virtual_network="3", ordered="false";
-  MessageBuffer responseFromDir, network="To", virtual_network="3", ordered="false";
-
-  MessageBuffer dmaRequestFromDir, network="To", virtual_network="6", ordered="true";
-  MessageBuffer dmaRequestToDir, network="From", virtual_network="7", ordered="true";
+  MessageBuffer requestToDir, network="From", virtual_network="0", ordered="false";
+  MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false";
 
+  MessageBuffer requestFromDir, network="To", virtual_network="0", ordered="false";
+  MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false";
 
   // STATES
   enumeration(State, desc="Directory states", default="Directory_State_I") {
@@ -118,9 +116,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
 //  DirectoryMemory directory, constructor_hack="i";
 //  MemoryControl memBuffer, constructor_hack="i";
 
-  DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])';
+  DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory"])';
 
-  MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])';
+  MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_control"])';
 
 
   TBETable TBEs, template_hack="<Directory_TBE>";
@@ -167,32 +165,19 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
   // ** OUT_PORTS **
   out_port(responseNetwork_out, ResponseMsg, responseFromDir);
   out_port(memQueue_out, MemoryMsg, memBuffer);
-  out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaRequestFromDir);
 
   // ** IN_PORTS **
 
-//added by SS for dma
-  in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) {
-    if (dmaRequestQueue_in.isReady()) {
-      peek(dmaRequestQueue_in, DMARequestMsg) {
-        if (in_msg.Type == DMARequestType:READ) {
-          trigger(Event:DMA_READ, in_msg.LineAddress);
-        } else if (in_msg.Type == DMARequestType:WRITE) {
-          trigger(Event:DMA_WRITE, in_msg.LineAddress);
-        } else {
-          error("Invalid message");
-        }
-      }
-    }
-  }
-
-
   in_port(requestNetwork_in, RequestMsg, requestToDir) {
     if (requestNetwork_in.isReady()) {
       peek(requestNetwork_in, RequestMsg) {
         assert(in_msg.Destination.isElement(machineID));
         if (isGETRequest(in_msg.Type)) {
           trigger(Event:Fetch, in_msg.Address);
+        } else if (in_msg.Type == CoherenceRequestType:DMA_READ) {
+          trigger(Event:DMA_READ, makeLineAddress(in_msg.Address));
+        } else if (in_msg.Type == CoherenceRequestType:DMA_WRITE) {
+          trigger(Event:DMA_WRITE, makeLineAddress(in_msg.Address));          
         } else {
           DEBUG_EXPR(in_msg);
           error("Invalid message");
@@ -328,7 +313,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
   }
 //added by SS for dma
   action(qf_queueMemoryFetchRequestDMA, "qfd", desc="Queue off-chip fetch request") {
-    peek(dmaRequestQueue_in, DMARequestMsg) {
+    peek(requestNetwork_in, RequestMsg) {
       enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
         out_msg.Address := address;
         out_msg.Type := MemoryRequestType:MEMORY_READ;
@@ -342,14 +327,14 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
   }
 
   action(p_popIncomingDMARequestQueue, "p", desc="Pop incoming DMA queue") {
-    dmaRequestQueue_in.dequeue();
+    requestNetwork_in.dequeue();
   }
 
   action(dr_sendDMAData, "dr", desc="Send Data to DMA controller from directory") {
     peek(memQueue_in, MemoryMsg) {
-      enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) {
-        out_msg.PhysicalAddress := address;
-        out_msg.Type := DMAResponseType:DATA;
+      enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) {
+        out_msg.Address := address;
+        out_msg.Type := CoherenceResponseType:DATA;
         out_msg.DataBlk := in_msg.DataBlk;   // we send the entire data block and rely on the dma controller to split it up if need be
         out_msg.Destination.add(map_Address_to_DMA(address));
         out_msg.MessageSize := MessageSizeType:Response_Data;
@@ -358,15 +343,13 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
   }
 
   action(dw_writeDMAData, "dw", desc="DMA Write data to memory") {
-    peek(dmaRequestQueue_in, DMARequestMsg) {
-      //directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len);
-
-      directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.PhysicalAddress), in_msg.Len);
+    peek(requestNetwork_in, RequestMsg) {
+      directory[address].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.Address), in_msg.Len);
     }
   }
 
   action(qw_queueMemoryWBRequest_partial, "qwp", desc="Queue off-chip writeback request") {
-     peek(dmaRequestQueue_in, DMARequestMsg) {
+     peek(requestNetwork_in, RequestMsg) {
       enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
         out_msg.Address := address;
         out_msg.Type := MemoryRequestType:MEMORY_WB;
@@ -384,9 +367,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
   }
 
   action(da_sendDMAAck, "da", desc="Send Ack to DMA controller") {
-      enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) {
-        out_msg.PhysicalAddress := address;
-        out_msg.Type := DMAResponseType:ACK;
+      enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) {
+        out_msg.Address := address;
+        out_msg.Type := CoherenceResponseType:ACK;
         out_msg.Destination.add(map_Address_to_DMA(address));
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
@@ -397,7 +380,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
   }
 
   action(zz_recycleDMAQueue, "zz", desc="recycle DMA queue") {
-    dmaRequestQueue_in.recycle();
+    requestNetwork_in.recycle();
   }
 
 
@@ -410,12 +393,12 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
 
 
   action(inv_sendCacheInvalidate, "inv", desc="Invalidate a cache block") {
-    peek(dmaRequestQueue_in, DMARequestMsg) {
+    peek(requestNetwork_in, RequestMsg) {
       enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:INV;
       out_msg.Sender := machineID;
-      out_msg.Destination := directory[in_msg.PhysicalAddress].Owner;
+      out_msg.Destination := directory[address].Owner;
       out_msg.MessageSize := MessageSizeType:Response_Control;
       }
     }
@@ -424,9 +407,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
 
   action(drp_sendDMAData, "drp", desc="Send Data to DMA controller from incoming PUTX") {
     peek(responseNetwork_in, ResponseMsg) {
-      enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) {
-        out_msg.PhysicalAddress := address;
-        out_msg.Type := DMAResponseType:DATA;
+      enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) {
+        out_msg.Address := address;
+        out_msg.Type := CoherenceResponseType:DATA;
         out_msg.DataBlk := in_msg.DataBlk;   // we send the entire data block and rely on the dma controller to split it up if need be
         out_msg.Destination.add(map_Address_to_DMA(address));
         out_msg.MessageSize := MessageSizeType:Response_Data;
@@ -439,10 +422,10 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
   }
 
   action(v_allocateTBE, "v", desc="Allocate TBE") {
-    peek(dmaRequestQueue_in, DMARequestMsg) {
+    peek(requestNetwork_in, RequestMsg) {
       TBEs.allocate(address);
       TBEs[address].DataBlk := in_msg.DataBlk;
-      TBEs[address].PhysicalAddress := in_msg.PhysicalAddress;
+      TBEs[address].PhysicalAddress := in_msg.Address;
       TBEs[address].Len := in_msg.Len; 
     }
   }
@@ -514,7 +497,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
 //added by SS for dma support
   transition(I, DMA_READ, ID) {
     qf_queueMemoryFetchRequestDMA;
-    p_popIncomingDMARequestQueue;
+    j_popIncomingRequestQueue;
   }
 
   transition(ID, Memory_Data, I) {
@@ -525,7 +508,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
   transition(I, DMA_WRITE, ID_W) {
     dw_writeDMAData;
     qw_queueMemoryWBRequest_partial;
-    p_popIncomingDMARequestQueue;
+    j_popIncomingRequestQueue;
   }
 
   transition(ID_W, Memory_Ack, I) {
@@ -544,7 +527,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
 
   transition(M, DMA_READ, M_DRD) {
     inv_sendCacheInvalidate;
-    p_popIncomingDMARequestQueue;
+    j_popIncomingRequestQueue;
   }
 
   transition(M_DRD, Data, M_DRDI) {
@@ -563,7 +546,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
   transition(M, DMA_WRITE, M_DWR) {
     v_allocateTBE;
     inv_sendCacheInvalidate;
-    p_popIncomingDMARequestQueue;
+    j_popIncomingRequestQueue;
   }
 
   transition(M_DWR, Data, M_DWRI) {
diff --git a/src/mem/protocol/MESI_CMP_directory-msg.sm b/src/mem/protocol/MESI_CMP_directory-msg.sm
index 15934e6b2..74c1e56f0 100644
--- a/src/mem/protocol/MESI_CMP_directory-msg.sm
+++ b/src/mem/protocol/MESI_CMP_directory-msg.sm
@@ -70,7 +70,8 @@ enumeration(CoherenceRequestType, desc="...") {
   WB_NACK,   desc="Writeback neg. ack";
   FWD,      desc="Generic FWD";
 
-
+  DMA_READ, desc="DMA Read";
+  DMA_WRITE, desc="DMA Write";
 }
 
 // CoherenceResponseType
@@ -95,6 +96,7 @@ structure(RequestMsg, desc="...", interface="NetworkMessage") {
   NetDest Destination,          desc="What components receive the request, includes MachineType and num";
   MessageSizeType MessageSize,  desc="size category of the message";
   DataBlock DataBlk,            desc="Data for the cache line (if PUTX)";
+  int Len;
   bool Dirty, default="false",  desc="Dirty bit";
   PrefetchBit Prefetch,         desc="Is this a prefetch request";
 }
@@ -111,68 +113,4 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") {
   MessageSizeType MessageSize,  desc="size category of the message";
 }
 
-enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") {
-  READ,          desc="Memory Read";
-  WRITE,         desc="Memory Write";
-  NULL,          desc="Invalid";
-}
-
-enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") {
-  DATA,          desc="DATA read";
-  ACK,           desc="ACK write";
-  NULL,          desc="Invalid";
-}
-
-structure(DMARequestMsg, desc="...", interface="NetworkMessage") {
-  DMARequestType Type,       desc="Request type (read/write)";
-  Address PhysicalAddress,   desc="Physical address for this request";
-  Address LineAddress,       desc="Line address for this request";
-  NetDest Destination,       desc="Destination";
-  DataBlock DataBlk,         desc="DataBlk attached to this request";
-  int Offset,                desc="The offset into the datablock";
-  int Len,                   desc="The length of the request";
-  MessageSizeType MessageSize, desc="size category of the message";
-}
-
-structure(DMAResponseMsg, desc="...", interface="NetworkMessage") {
-  DMAResponseType Type,      desc="Response type (DATA/ACK)";
-  Address PhysicalAddress,   desc="Physical address for this request";
-  Address LineAddress,       desc="Line address for this request";
-  NetDest Destination,       desc="Destination";
-  DataBlock DataBlk,         desc="DataBlk attached to this request";
-  MessageSizeType MessageSize, desc="size category of the message";
-}
-
-
-
-/*
-GenericRequestType convertToGenericType(CoherenceRequestType type) {
-  if(type == CoherenceRequestType:PUTX) {
-    return GenericRequestType:PUTX;
-  } else if(type == CoherenceRequestType:GETS) {
-    return GenericRequestType:GETS;
-  } else if(type == CoherenceRequestType:GET_INSTR) {
-    return GenericRequestType:GET_INSTR;
-  } else if(type == CoherenceRequestType:GETX) {
-    return GenericRequestType:GETX;
-  } else if(type == CoherenceRequestType:UPGRADE) {
-    return GenericRequestType:UPGRADE;
-  } else if(type == CoherenceRequestType:PUTS) {
-    return GenericRequestType:PUTS;
-  } else if(type == CoherenceRequestType:INV) {
-    return GenericRequestType:INV;
-  } else if(type == CoherenceRequestType:INV_S) {
-    return GenericRequestType:INV_S;
-  } else if(type == CoherenceRequestType:L1_DG) {
-    return GenericRequestType:DOWNGRADE;
-  } else if(type == CoherenceRequestType:WB_ACK) {
-    return GenericRequestType:WB_ACK;
-  } else if(type == CoherenceRequestType:EXE_ACK) {
-    return GenericRequestType:EXE_ACK;
-  } else {
-    DEBUG_EXPR(type);
-    error("invalid CoherenceRequestType");
-  }
-}
-*/
 
diff --git a/src/mem/protocol/MI_example-cache.sm b/src/mem/protocol/MI_example-cache.sm
index 915a0eb99..64771bac5 100644
--- a/src/mem/protocol/MI_example-cache.sm
+++ b/src/mem/protocol/MI_example-cache.sm
@@ -17,6 +17,7 @@ machine(L1Cache, "MI Example L1 Cache")
     II, desc="Not Present/Invalid, issued PUT";
     M,  desc="Modified";
     MI,  desc="Modified, issued PUT";
+    MII, desc="Modified, issued PUTX, received nack";
 
     IS,  desc="Issued request for LOAD/IFETCH";
     IM,  desc="Issued request for STORE/ATOMIC";
@@ -388,6 +389,16 @@ machine(L1Cache, "MI Example L1 Cache")
     o_popForwardedRequestQueue;
   }
 
+  transition(MI, Writeback_Nack, MII) {
+    o_popForwardedRequestQueue;
+  }
+
+  transition(MII, Fwd_GETX, I) {
+    ee_sendDataFromTBE;
+    w_deallocateTBE;
+    o_popForwardedRequestQueue;
+  }
+
   transition(II, Writeback_Nack, I) {
     w_deallocateTBE;
     o_popForwardedRequestQueue;
diff --git a/src/mem/protocol/MI_example-dir.sm b/src/mem/protocol/MI_example-dir.sm
index 0061a2838..1f64d25df 100644
--- a/src/mem/protocol/MI_example-dir.sm
+++ b/src/mem/protocol/MI_example-dir.sm
@@ -83,9 +83,9 @@ machine(Directory, "Directory protocol")
   }
 
   // ** OBJECTS **
-  DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])';
+  DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory"])';
 
-  MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])';
+  MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_control"])';
 
   TBETable TBEs, template_hack="<Directory_TBE>";
 
diff --git a/src/mem/protocol/MOESI_CMP_directory-dir.sm b/src/mem/protocol/MOESI_CMP_directory-dir.sm
index 8e48fc9ab..9e6cc918d 100644
--- a/src/mem/protocol/MOESI_CMP_directory-dir.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-dir.sm
@@ -127,8 +127,8 @@ machine(Directory, "Directory protocol")
 
   // ** OBJECTS **
 
-  DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])';
-  MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])';
+  DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory"])';
+  MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_control"])';
   TBETable TBEs, template_hack="<Directory_TBE>";
 
   State getState(Address addr) {
diff --git a/src/mem/protocol/RubySlicc_ComponentMapping.sm b/src/mem/protocol/RubySlicc_ComponentMapping.sm
index 2a027554e..891820c46 100644
--- a/src/mem/protocol/RubySlicc_ComponentMapping.sm
+++ b/src/mem/protocol/RubySlicc_ComponentMapping.sm
@@ -33,6 +33,7 @@ int getNumberOfLastLevelCaches();
 
 // NodeID map_address_to_node(Address addr);
 MachineID mapAddressToRange(Address addr, MachineType type, int low, int high);
+NetDest broadcast(MachineType type);
 MachineID map_Address_to_DMA(Address addr);
 MachineID map_Address_to_Directory(Address addr);
 NodeID map_Address_to_DirectoryNode(Address addr);
diff --git a/src/mem/protocol/SConsopts b/src/mem/protocol/SConsopts
index 7be9fd97e..10a303681 100644
--- a/src/mem/protocol/SConsopts
+++ b/src/mem/protocol/SConsopts
@@ -50,7 +50,7 @@ all_protocols = [
     'MOESI_hammer',
     ]
 
-opt = EnumVariable('PROTOCOL', 'Coherence Protocol for Ruby', 'MI_example',
+opt = EnumVariable('PROTOCOL', 'Coherence Protocol for Ruby', 'MOESI_CMP_directory',
                    all_protocols)
 
 sticky_vars.AddVariables(opt)
diff --git a/src/mem/ruby/config/MESI_CMP_directory.rb b/src/mem/ruby/config/MESI_CMP_directory.rb
index 4d9ff30b3..7a9d47f24 100644
--- a/src/mem/ruby/config/MESI_CMP_directory.rb
+++ b/src/mem/ruby/config/MESI_CMP_directory.rb
@@ -12,8 +12,8 @@ class MESI_CMP_directory_L2CacheController < CacheController
   def argv()
     vec = super()
     vec += " cache " + cache.obj_name
-    vec += " l2_request_latency "+l2_request_latency.to_s 
-    vec += " l2_response_latency "+l2_response_latency.to_s
+    vec += " l2_request_latency "+request_latency.to_s 
+    vec += " l2_response_latency "+response_latency.to_s
     vec += " to_l1_latency "+to_L1_latency.to_s
     return vec
   end
diff --git a/src/mem/ruby/config/MI_example-homogeneous.rb b/src/mem/ruby/config/MI_example-homogeneous.rb
index 1ed81ee42..71e20c318 100644
--- a/src/mem/ruby/config/MI_example-homogeneous.rb
+++ b/src/mem/ruby/config/MI_example-homogeneous.rb
@@ -34,6 +34,13 @@ for i in 0..$*.size-1 do
   elsif $*[i] == "-m"
     num_memories = $*[i+1].to_i
     i = i+1
+  elsif $*[i] == "-R"
+    if $*[i+1] == "rand"
+      RubySystem.random_seed = "rand"
+    else
+      RubySystem.random_seed = $*[i+1].to_i
+    end
+    i = i+ 1
   elsif $*[i] == "-s"
     memory_size_mb = $*[i+1].to_i
     i = i + 1
diff --git a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
index 566055f74..a8ef1eceb 100644
--- a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
+++ b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
@@ -12,13 +12,13 @@ RubySystem.reset
 # default values
 
 num_cores = 2
-l1_icache_size_bytes = 32768
+l1_icache_size_kb = 64
 l1_icache_assoc = 8
 l1_icache_latency = 1
-l1_dcache_size_bytes = 32768
+l1_dcache_size_kb = 32
 l1_dcache_assoc = 8
 l1_dcache_latency = 1
-l2_cache_size_bytes = 2048 # total size (sum of all banks)
+l2_cache_size_kb = 8192 # total size (sum of all banks)
 l2_cache_assoc = 16
 l2_cache_latency = 12
 num_l2_banks = num_cores
@@ -26,7 +26,8 @@ num_memories = 1
 memory_size_mb = 1024
 num_dma = 1
 
-protocol = "MOESI_CMP_token"
+#default protocol 
+protocol = "MOESI_CMP_directory"
 
 # check for overrides
 
@@ -34,59 +35,50 @@ for i in 0..$*.size-1 do
   if $*[i] == "-c" or $*[i] == "--protocol"
     i += 1
     protocol = $*[i]
+  elsif $*[i] == "-A"
+    l1_dcache_size_kb = $*[i+1].to_i
+    i = i+1
+  elsif $*[i] == "-B"
+    num_l2_banks = $*[i+1].to_i
+    i = i+1
   elsif $*[i] == "-m"
     num_memories = $*[i+1].to_i
     i = i+1
   elsif $*[i] == "-p"
     num_cores = $*[i+1].to_i
     i = i+1
+  elsif $*[i] == "-R"
+    if $*[i+1] == "rand"
+      RubySystem.random_seed = "rand"
+    else
+      RubySystem.random_seed = $*[i+1].to_i
+    end
+    i = i+ 1
   elsif $*[i] == "-s"
     memory_size_mb = $*[i+1].to_i
     i = i + 1
-  elsif $*[i] == "-C"
-    l1_dcache_size_bytes = $*[i+1].to_i
-    i = i + 1
-  elsif $*[i] == "-A"
-    l1_dcache_assoc = $*[i+1].to_i
-    i = i + 1
-  elsif $*[i] == "-D"
-    num_dma = $*[i+1].to_i
-    i = i + 1
   end
 end
 
-n_tokens = num_cores + 1
-
 net_ports = Array.new
 iface_ports = Array.new
 
-#assert(protocol == "MESI_CMP_directory", __FILE__+" cannot be used with protocol "+protocol);
+assert((protocol == "MESI_CMP_directory" or protocol == "MOESI_CMP_directory"), __FILE__+" cannot be used with protocol '#{protocol}'");
 
 require protocol+".rb"
 
 num_cores.times { |n|
-  icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_bytes, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
-  dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_bytes, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
+  icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
+  dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
   sequencer = Sequencer.new("Sequencer_"+n.to_s, icache, dcache)
   iface_ports << sequencer
-  if protocol == "MOESI_CMP_token"
-    net_ports << MOESI_CMP_token_L1CacheController.new("L1CacheController_"+n.to_s,
-                                                       "L1Cache",
-                                                       icache, dcache,
-                                                       sequencer,
-                                                       num_l2_banks,
-                                                       n_tokens)
-  end
-
   if protocol == "MOESI_CMP_directory"
     net_ports << MOESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s,
                                                            "L1Cache",
                                                            icache, dcache,
                                                            sequencer,
                                                            num_l2_banks)
-  end
-
-  if protocol == "MESI_CMP_directory"
+  elsif protocol == "MESI_CMP_directory"
     net_ports << MESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s,
                                                            "L1Cache",
                                                            icache, dcache,
@@ -95,47 +87,29 @@ num_cores.times { |n|
   end
 }
 num_l2_banks.times { |n|
-  cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_bytes/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
-  if protocol == "MOESI_CMP_token"
-    net_ports << MOESI_CMP_token_L2CacheController.new("L2CacheController_"+n.to_s,
-                                                       "L2Cache",
-                                                       cache,
-                                                       n_tokens)
-  end
-
+  cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_kb/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
   if protocol == "MOESI_CMP_directory"
     net_ports << MOESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
                                                            "L2Cache",
                                                            cache)
-  end
-
-  if protocol == "MESI_CMP_directory"
+  elsif protocol == "MESI_CMP_directory"
     net_ports << MESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
                                                            "L2Cache",
                                                            cache)
   end
 
-
+  net_ports.last.request_latency = l2_cache_latency + 2
+  net_ports.last.response_latency = l2_cache_latency + 2
 }
 num_memories.times { |n|
   directory = DirectoryMemory.new("DirectoryMemory_"+n.to_s, memory_size_mb/num_memories)
   memory_control = MemoryControl.new("MemoryControl_"+n.to_s)
-  if protocol == "MOESI_CMP_token"
-    net_ports << MOESI_CMP_token_DirectoryController.new("DirectoryController_"+n.to_s,
-                                                         "Directory",
-                                                         directory, 
-                                                         memory_control,
-                                                         num_l2_banks)
-  end
-
   if protocol == "MOESI_CMP_directory"
     net_ports << MOESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s,
                                                              "Directory",
                                                              directory, 
                                                              memory_control)
-  end
-
-  if protocol == "MESI_CMP_directory"
+  elsif protocol == "MESI_CMP_directory"
     net_ports << MESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s,
                                                              "Directory",
                                                              directory,
@@ -146,19 +120,11 @@ num_memories.times { |n|
 num_dma.times { |n|
   dma_sequencer = DMASequencer.new("DMASequencer_"+n.to_s)
   iface_ports << dma_sequencer
-  if protocol == "MOESI_CMP_token"
-    net_ports << MOESI_CMP_token_DMAController.new("DMAController_"+n.to_s,
-                                                   "DMA",
-                                                   dma_sequencer)
-  end
-
   if protocol == "MOESI_CMP_directory"
     net_ports << MOESI_CMP_directory_DMAController.new("DMAController_"+n.to_s,
                                                        "DMA",
                                                        dma_sequencer)
-  end
-
-  if protocol == "MESI_CMP_directory"
+  elsif protocol == "MESI_CMP_directory"
     net_ports << MESI_CMP_directory_DMAController.new("DMAController_"+n.to_s,
                                                        "DMA",
                                                        dma_sequencer)
diff --git a/src/mem/ruby/config/assert.rb b/src/mem/ruby/config/assert.rb
new file mode 100644
index 000000000..cc3e43214
--- /dev/null
+++ b/src/mem/ruby/config/assert.rb
@@ -0,0 +1,18 @@
+#!/usr/bin/env ruby
+
+class AssertionFailure < RuntimeError
+  attr_reader :msg, :output
+  def initialize(message, out=nil)
+    @msg = message
+    @output = out
+  end
+end
+
+class NotImplementedException < Exception
+end
+
+def assert(condition,message)
+  unless condition
+    raise AssertionFailure.new(message), "\n\nAssertion failed: \n\n    #{message}\n\n"
+  end
+end
diff --git a/src/mem/ruby/config/cfg.rb b/src/mem/ruby/config/cfg.rb
index c470ca92f..d57862420 100644
--- a/src/mem/ruby/config/cfg.rb
+++ b/src/mem/ruby/config/cfg.rb
@@ -1,7 +1,7 @@
 #!/usr/bin/ruby
 
-class AssertionFailure < RuntimeError
-end
+root = File.dirname(File.expand_path(__FILE__))
+require root+'/assert.rb'
 
 class Boolean
   def self.is_a?(obj)
@@ -9,22 +9,46 @@ class Boolean
   end
 end
 
-def assert(condition,message)
-  unless condition
-    raise AssertionFailure, "\n\nAssertion failed: \n\n   #{message}\n\n"
-  end
-end
-
 class LibRubyObject
   @@all_objs = Array.new
-  attr_reader :obj_name
   @@default_params = Hash.new
+  @@param_types = Hash.new
+
+  attr_reader :obj_name
 
   def initialize(obj_name)
     assert obj_name.is_a?(String), "Obj_Name must be a string"
     @obj_name = obj_name
     @@all_objs << self
     @params = Hash.new
+
+    # add all parent parameter accessors if they don't exist
+    self.class.ancestors.each { |ancestor|
+      if @@default_params.key?(ancestor.name.to_sym)
+        @@default_params[ancestor.name.to_sym].each { |p, default|
+          p = p.to_sym
+          @params[p] = default
+          if ! respond_to?(p)
+            self.class.send(:define_method, p) {
+              @params[p] = @@default_params[ancestor.name.to_sym][p] if ! @params.key?(p)
+              return @params[p]
+            }
+          end
+          setter_method_name = (p.to_s + "=").to_sym
+          if ! respond_to?(setter_method_name)
+            self.class.send(:define_method, setter_method_name) { |val|
+              type = @@param_types[ancestor.name.to_sym][p]
+              if val.is_a?(FalseClass) || val.is_a?(TrueClass)
+                assert type.is_a?(Boolean), "default value of param \"#{p}\" must be either true or false"
+              else
+                assert val.is_a?(type), "default value of param \"#{p}\", which is of type #{val.class.name} does not match expected type #{type}"
+              end
+              @params[p] = val
+            }
+          end
+        }
+      end
+    }
   end
 
   def cppClassName()
@@ -35,40 +59,24 @@ class LibRubyObject
     idx = self.name.to_sym
     @@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
     @@default_params[idx][param_name] = nil
-    send :define_method, param_name do
-      @params[param_name] = @@default_params[idx][param_name] if ! @params.key?(param_name)
-      @params[param_name]
-    end
-    method_name = (param_name.to_s + "=").to_sym
-    send :define_method, method_name do |val|
-      if val.is_a?(FalseClass) || val.is_a?(TrueClass)
-        assert type.is_a?(Boolean), "default value of param \"#{param_name}\" must be either true or false"
-      else
-        assert val.is_a?(type), "default value of param \"#{param_name}\" does not match type #{type}"
-      end
-#      assert val.is_a?(type), "#{param_name} must be of type #{type}"
-      @params[param_name] = val
-    end
+    @@param_types[idx] = Hash.new if ! @@param_types.key?(idx)
+    @@param_types[idx][param_name] = type
   end
 
   def self.default_param(param_name, type, default)
-    idx = self.name.to_sym
-    @@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
+
     if default.is_a?(FalseClass) || default.is_a?(TrueClass)
       assert type.is_a?(Boolean), "default value of param \"#{param_name}\" must be either true or false"
     else
       assert default.is_a?(type), "default value of param \"#{param_name}\" does not match type #{type}"
     end
+
+    idx = self.name.to_sym
+    @@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
     @@default_params[idx][param_name] = default
-    send :define_method, param_name do
-      @params[param_name] = @@default_params[idx][param_name] if ! @params.key?(param_name)
-      @params[param_name]
-    end
-    method_name = (param_name.to_s + "=").to_sym
-    send :define_method, method_name do |val|
-      assert val.is_a?(type), "#{param_name} must be of type #{type}"
-      @params[param_name] = val
-    end
+    @@param_types[idx] = Hash.new if ! @@param_types.key?(idx)
+    @@param_types[idx][param_name] = type
+
   end
 
   def applyDefaults()
@@ -86,6 +94,7 @@ class LibRubyObject
 
     @params.each { |key, val|
       str += key.id2name + " "
+      assert(val != nil, "parameter #{key} is nil")
       if val.is_a?(LibRubyObject)
         str += val.obj_name + " "
       else
@@ -123,36 +132,32 @@ end
 
 class NetPort < LibRubyObject
   attr :mach_type
-  attr_reader :version
+  param :version, Integer
 
   @@type_cnt = Hash.new
-  @type_id
   def initialize(obj_name, mach_type)
     super(obj_name)
     @mach_type = mach_type
     @@type_cnt[mach_type] ||= 0
-    @type_id = @@type_cnt[mach_type]
+    self.version= @@type_cnt[mach_type] # sets the version parameter
+
     @@type_cnt[mach_type] += 1
 
-    idx = "NetPort".to_sym
-    @@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
-    @@default_params[idx].each { |key, val|
-      @params[key] = val if ! @params.key?(key)
-    }
   end
 
   def port_name
     mach_type
   end
   def port_num
-    @type_id
-  end
-  def cppClassName
-    "NetPort"
+    version
   end
   def self.totalOfType(mach_type)
     return @@type_cnt[mach_type]
   end
+  def cppClassName()
+    "generated:"+@mach_type
+  end
+
 end
 
 class MemoryVector < LibRubyObject
@@ -161,7 +166,7 @@ class MemoryVector < LibRubyObject
   end
 
   def cppClassName
-    "MemoryController"
+    "MemoryVector"
   end
 end
 
@@ -296,37 +301,13 @@ private
 
 end
 
-
-
-
 class CacheController < NetPort
-  @@total_cache_controllers = Hash.new
 
   def initialize(obj_name, mach_type, caches)
     super(obj_name, mach_type)
     caches.each { |cache|
       cache.controller = self
     }
-
-    if !@@total_cache_controllers.key?(mach_type)
-      @@total_cache_controllers[mach_type] = 0
-    end
-    @version = @@total_cache_controllers[mach_type]
-    @@total_cache_controllers[mach_type] += 1
-    
-    # call inhereted parameters
-    transitions_per_cycle
-    buffer_size
-    number_of_TBEs
-    recycle_latency
-  end
-
-  def argv()
-    vec = "version "+@version.to_s
-    vec += " transitions_per_cycle "+@params[:transitions_per_cycle].to_s
-    vec += " buffer_size "+@params[:buffer_size].to_s
-    vec += " number_of_TBEs "+@params[:number_of_TBEs].to_s
-    vec += " recycle_latency "+@params[:recycle_latency].to_s
   end
 
   def cppClassName()
@@ -334,164 +315,161 @@ class CacheController < NetPort
   end
 end
 
+class Sequencer < IfacePort
+end
+
 class L1CacheController < CacheController
-  attr :sequencer
+  param :sequencer, Sequencer
 
   def initialize(obj_name, mach_type, caches, sequencer)
     super(obj_name, mach_type, caches)
 
-    @sequencer = sequencer
-    @sequencer.controller = self
-    @sequencer.version = @version
+    sequencer.controller = self
+    sequencer.version = version
+    self.sequencer= sequencer
   end
 
-  def argv()
-    vec = super()
-    vec += " sequencer "+@sequencer.obj_name
-  end
+#  def argv()
+#    vec = super()
+#    vec += " sequencer "+@sequencer.obj_name
+#  end
+end
+
+class DirectoryMemory < LibRubyObject
+end
+class MemoryControl < LibRubyObject
 end
 
 class DirectoryController < NetPort
   @@total_directory_controllers = 0
-  attr :directory
-  attr :memory_control
+  param :directory, DirectoryMemory
+  param :memory_control, MemoryControl
 
   def initialize(obj_name, mach_type, directory, memory_control)
     super(obj_name, mach_type)
 
-    @directory = directory
     directory.controller = self
-
-    @memory_control = memory_control
+    directory.version = @@total_directory_controllers
+    self.directory = directory
+    self.memory_control = memory_control
 
     @version = @@total_directory_controllers
     @@total_directory_controllers += 1
     buffer_size()
   end
 
-  def argv()
-    "version "+@version.to_s+" directory_name "+@directory.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s + " memory_controller_name "+@memory_control.obj_name + " recycle_latency "+@params[:recycle_latency].to_s
-  end
-
   def cppClassName()
     "generated:"+@mach_type
   end
 
 end
 
+class DMASequencer < IfacePort
+end
+
 class DMAController < NetPort
   @@total_dma_controllers = 0
-  attr :dma_sequencer
+  param :dma_sequencer, DMASequencer
+  param :version, Integer
+
   def initialize(obj_name, mach_type, dma_sequencer)
     super(obj_name, mach_type)
-    @dma_sequencer = dma_sequencer
-    @version = @@total_dma_controllers
-    @@total_dma_controllers += 1
     dma_sequencer.controller = self
-    buffer_size
-  end
+    dma_sequencer.version = @@total_dma_controllers
+    self.dma_sequencer = dma_sequencer
 
-  def argv()
-    "version "+@version.to_s+" dma_sequencer "+@dma_sequencer.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s +  " recycle_latency "+@params[:recycle_latency].to_s
+    self.version = @@total_dma_controllers
+    @@total_dma_controllers += 1
   end
 
-  def cppClassName()
-    "generated:"+@mach_type
-  end
 end
 
 class Cache < LibRubyObject
-  attr :size, :latency
-  attr_writer :controller
-  def initialize(obj_name, size, latency)
+  param :size_kb, Integer
+  param :latency, Integer
+  param :controller, NetPort
+  def initialize(obj_name, size_kb, latency)
     super(obj_name)
-    assert size.is_a?(Integer), "Cache size must be an integer"
-    @size = size
-    @latency = latency
+    self.size_kb = size_kb
+    self.latency = latency
+    # controller must be set manually by the configuration script
+    # because there is a cyclic dependence
   end
 
-  def args
-    "controller "+@controller.obj_name+" size "+@size.to_s+" latency "+@latency.to_s
-  end
 end
 
 class SetAssociativeCache < Cache
-  attr :assoc, :replacement_policy
+  param :assoc, Integer
+  param :replacement_policy, String
 
   # latency can be either an integer, a float, or the string "auto"
   #  when an integer, it represents the number of cycles for a hit
   #  when a float, it represents the cache access time in ns
   #  when set to "auto", libruby will attempt to find a realistic latency by running CACTI
-  def initialize(obj_name, size, latency, assoc, replacement_policy)
-    super(obj_name, size, latency)
-    @assoc = assoc
-    @replacement_policy = replacement_policy
+  def initialize(obj_name, size_kb, latency, assoc, replacement_policy)
+    super(obj_name, size_kb, latency)
+    self.assoc = assoc
+    self.replacement_policy = replacement_policy
   end
 
   def calculateLatency()
-    if @latency == "auto"
+    if self.latency == "auto"
       cacti_args = Array.new()
-      cacti_args << (@size) <<  RubySystem.block_size_bytes << @assoc
+      cacti_args << (self.size_kb*1024) <<  RubySystem.block_size_bytes << self.assoc
       cacti_args << 1 << 0 << 0 << 0 << 1
       cacti_args << RubySystem.tech_nm << RubySystem.block_size_bytes*8
       cacti_args << 0 << 0 << 0 << 1 << 0 << 0 << 0 << 0 << 1
       cacti_args << 360 << 0 << 0 << 0 << 0 << 1 << 1 << 1 << 1 << 0 << 0
       cacti_args << 50 << 10 << 10 << 0 << 1 << 1
 
-#      cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ")
-
-#      IO.popen(cacti_cmd) { |pipe|
-#        str1 = pipe.readline
-#        str2 = pipe.readline
-#        results = str2.split(", ")
-#        if results.size != 61
-#          print "CACTI ERROR: CACTI produced unexpected output.\n"
-#          print "Are you using the version shipped with libruby?\n"
-#          raise Exception
-#        end
-#        latency_ns = results[5].to_f
-#        if (latency_ns == "1e+39")
-#          print "CACTI ERROR:  CACTI was unable to realistically model the cache ",@obj_name,"\n"
-#          print "Either change the cache parameters or manually set the latency values\n"
-#          raise Exception
-#        end
-#        clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
-#        latency_cycles = (latency_ns / clk_period_ns).ceil
-#        @latency = latency_cycles
-#      }
-    elsif @latency.is_a?(Float)
+      cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ")
+
+      IO.popen(cacti_cmd) { |pipe|
+        str1 = pipe.readline
+        str2 = pipe.readline
+        results = str2.split(", ")
+        if results.size != 61
+          print "CACTI ERROR: CACTI produced unexpected output.\n"
+          print "Are you using the version shipped with libruby?\n"
+          raise Exception
+        end
+        latency_ns = results[5].to_f
+        if (latency_ns == "1e+39")
+          print "CACTI ERROR:  CACTI was unable to realistically model the cache ",@obj_name,"\n"
+          print "Either change the cache parameters or manually set the latency values\n"
+          raise Exception
+        end
+        clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
+        latency_cycles = (latency_ns / clk_period_ns).ceil
+        self.latency = latency_cycles
+      }
+    elsif self.latency.is_a?(Float)
       clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
-      latency_cycles = (@latency / clk_period_ns).ceil
-      @latency = latency_cycles
-    elsif ! @latency.is_a?(Integer)
+      latency_cycles = (self.latency / clk_period_ns).ceil
+      self.latency = latency_cycles
+    elsif ! self.latency.is_a?(Integer)
       raise Exception
     end
   end
 
-  def argv()
-    args+" assoc "+@assoc.to_s+" replacement_policy "+@replacement_policy
-  end
-
   def cppClassName()
     "SetAssociativeCache"
   end
 end
 
 class DirectoryMemory < LibRubyObject
-  attr :size_mb
-  attr_writer :controller
+  param :size_mb, Integer
+  param :controller, NetPort
+  param :version, Integer
+
   @@total_size_mb = 0
 
   def initialize(obj_name, size_mb)
     super(obj_name)
-    @size_mb = size_mb
+    self.size_mb = size_mb
     @@total_size_mb += size_mb
   end
 
-  def argv()
-    "version "+@controller.version.to_s+" size_mb "+@size_mb.to_s+" controller "+@controller.obj_name
-  end
-
   def cppClassName()
     "DirectoryMemory"
   end
@@ -501,43 +479,17 @@ class DirectoryMemory < LibRubyObject
   end
 end
 
-#added by SS
 class MemoryControl < LibRubyObject
-  attr :name
   def initialize(obj_name)
     super(obj_name)
-    @name = obj_name
   end
 
-  def argv()
-    vec = super()
-    vec += " mem_bus_cycle_multiplier "+mem_bus_cycle_multiplier.to_s
-    vec += " banks_per_rank "+banks_per_rank.to_s
-    vec += " ranks_per_dimm "+ranks_per_dimm.to_s
-    vec += " dimms_per_channel "+dimms_per_channel.to_s
-    vec += " bank_bit_0 "+bank_bit_0.to_s
-    vec += " rank_bit_0 "+rank_bit_0.to_s
-    vec += " dimm_bit_0 "+dimm_bit_0.to_s
-    vec += " bank_queue_size "+bank_queue_size.to_s
-    vec += " bank_busy_time "+bank_busy_time.to_s
-    vec += " rank_rank_delay "+rank_rank_delay.to_s
-    vec += " read_write_delay "+read_write_delay.to_s
-    vec += " basic_bus_busy_time "+basic_bus_busy_time.to_s
-    vec += " mem_ctl_latency "+mem_ctl_latency.to_s
-    vec += " refresh_period "+refresh_period.to_s
-    vec += " tFaw "+tFaw.to_s
-    vec += " mem_random_arbitrate "+mem_random_arbitrate.to_s
-    vec += " mem_fixed_delay "+mem_fixed_delay.to_s
-    vec += " memory_controller_name "+@name
-
-  end
-
-
   def cppClassName()
     "MemoryControl"
   end
 end
 
+
 class Sequencer < IfacePort
 
   def cppClassName()
@@ -564,17 +516,11 @@ end
 
 
 class DMASequencer < IfacePort
+  param :controller, NetPort
+  param :version, Integer
+
   def initialize(obj_name)
     super(obj_name)
-    @params = {
-      :controller => nil,
-      :version => nil
-    }
-  end
-
-  def controller=(controller)
-    @params[:controller] = controller.obj_name
-    @params[:version] = controller.version
   end
 
   def cppClassName()
@@ -582,7 +528,7 @@ class DMASequencer < IfacePort
   end
 
   def bochsConnType()
-    return "dma"+@params[:version].to_s
+    return "dma"+self.version.to_s
   end
 end
 
@@ -613,22 +559,8 @@ class Network < LibRubyObject
   param :topology, Topology
   def initialize(name, topo)
     super(name)
-    @params[:topology] = topo
     topo.network= self
-  end
-
-  def argv()
-    vec = super()
-
-    vec += " endpoint_bandwidth "+endpoint_bandwidth.to_s
-    vec += " adaptive_routing "+adaptive_routing.to_s
-    vec += " number_of_virtual_networks "+number_of_virtual_networks.to_s
-    vec += " fan_out_degree "+fan_out_degree.to_s
-
-    vec += " buffer_size "+buffer_size.to_s
-    vec += " link_latency "+adaptive_routing.to_s
-    vec += " on_chip_latency "+on_chip_latency.to_s
-    vec += " control_msg_size "+control_msg_size.to_s
+    self.topology = topo
   end
 
   def printTopology()
@@ -689,7 +621,6 @@ class CrossbarTopology < Topology
   end
 end
 
-#added by SS
 class Tracer < LibRubyObject
   def initialize(obj_name)
     super(obj_name)
@@ -712,20 +643,10 @@ class Profiler < LibRubyObject
 
 end
 
-#added by SS
 class GarnetNetwork < Network
   def initialize(name, topo)
     super(name, topo)
   end
-  def argv()
-    vec = super()
-    vec += " flit_size "+flit_size.to_s
-    vec += " number_of_pipe_stages "+number_of_pipe_stages.to_s
-    vec += " vcs_per_class "+vcs_per_class.to_s
-    vec += " buffer_size "+buffer_size.to_s
-    vec += " using_network_testing "+using_network_testing.to_s
-  end
-
 end
 
 class GarnetFixedPipeline < GarnetNetwork
@@ -733,10 +654,6 @@ class GarnetFixedPipeline < GarnetNetwork
     super(name, net_ports)
   end
 
-  def argv()
-    super()
-  end
-
   def cppClassName()
     "GarnetNetwork_d"
   end
@@ -747,14 +664,9 @@ class GarnetFlexiblePipeline < GarnetNetwork
     super(name, net_ports)
   end
 
-  def argv()
-    super()
-  end
-
   def cppClassName()
     "GarnetNetwork"
   end
 end
 
-#added by SS
 require "defaults.rb"
diff --git a/src/mem/ruby/config/config.hh b/src/mem/ruby/config/config.hh
deleted file mode 100644
index ad91cd73d..000000000
--- a/src/mem/ruby/config/config.hh
+++ /dev/null
@@ -1,236 +0,0 @@
-
-// FOR MOESI_CMP_token
-//PARAM_BOOL(  FilteringEnabled, false, false );
-//PARAM_BOOL(  DistributedPersistentEnabled, true, false );
-//PARAM_BOOL(  DynamicTimeoutEnabled, true, false );
-//PARAM( RetryThreshold, 1, false );
-//PARAM( FixedTimeoutLatency, 300, false );
-
-//PARAM( TraceWarmupLength, 1000000, false );
-
-//PARAM( callback_counter, 0, false );
-//PARAM( NUM_COMPLETIONS_BEFORE_PASS, 0, false );
-
-//PARAM( tester_length, 0, false );
-//PARAM( synthetic_locks, 2048, false );
-//PARAM( think_time, 5, false );
-//PARAM( wait_time, 5, false );
-//PARAM( hold_time, 5, false );
-//PARAM( deterministic_addrs, 1, false );
-//PARAM_STRING( SpecifiedGenerator, "DetermInvGenerator", false );
-
-// For debugging purposes, one can enable a trace of all the protocol
-// state machine changes. Unfortunately, the code to generate the
-// trace is protocol specific. To enable the code for some of the
-// standard protocols,
-//   1. change "PROTOCOL_DEBUG_TRACE = true"
-//   2. enable debug in Makefile
-//   3. use the "--start 1" command line parameter or
-//      "g_debug_ptr->setDebugTime(1)" to beging the following to set the
-//      debug begin time
-//
-// this use to be ruby/common/Global.hh
-
-//PARAM_BOOL(  ProtocolDebugTrace, true, false );
-// a string for filtering debugging output (for all g_debug vars see Debug.hh)
-//PARAM_STRING( DEBUG_FILTER_STRING, "", false );
-// filters debugging messages based on priority (low, med, high)
-//PARAM_STRING( DEBUG_VERBOSITY_STRING, "", false );
-// filters debugging messages based on a ruby time
-//PARAM_ULONG( DEBUG_START_TIME, 0, false );
-// sends debugging messages to a output filename
-//PARAM_STRING( DEBUG_OUTPUT_FILENAME, "", false );
-
-//PARAM_BOOL( ProfileHotLines, false, false );
-
-// PROFILE_ALL_INSTRUCTIONS is used if you want Ruby to profile all instructions executed
-// The following need to be true for this to work correctly:
-// 1. Disable istc and dstc for this simulation run
-// 2. Add the following line to the object "sim" in the checkpoint you run from:
-//      instruction_profile_line_size: 4
-// This is used to have simics report back all instruction requests
-
-// For more details on how to find out how to interpret the output physical instruction
-// address, please read the document in the simics-howto directory
-//PARAM_BOOL( ProfileAllInstructions, false, false );
-
-// Set the following variable to true if you want a complete trace of
-// PCs (physical address of program counters, with executing processor IDs)
-// to be printed to stdout. Make sure to direct the simics output to a file.
-// Otherwise, the run will take a really long time!
-// A long run may write a file that can exceed the OS limit on file length
-//PARAM_BOOL( PRINT_INSTRUCTION_TRACE, false, false );
-//PARAM( DEBUG_CYCLE, 0, false );
-
-// Make the entire memory system perfect
-//PARAM_BOOL( PERFECT_MEMORY_SYSTEM, false, false );
-//PARAM( PERFECT_MEMORY_SYSTEM_LATENCY, 0, false );
-
-// *********************************************
-// SYSTEM PARAMETERS
-// *********************************************
-
-//PARAM( NumberOfChips, 1, false );
-//PARAM( NumberOfCores, 2, false );
-//PARAM_ARRAY( NumberOfCoresPerChip, int, m_NumberOfChips, 2, false);
-
-// *********************************************
-// CACHE PARAMETERS
-// *********************************************
-
-//PARAM( NumberOfCaches, m_NumberOfCores, false );
-//PARAM( NumberOfCacheLevels, 1, false );
-/* this returns the number of discrete CacheMemories per level (i.e. a split L1 counts for 2) */
-//PARAM_ARRAY( NumberOfCachesPerLevel, int, m_NumberOfCacheLevels, m_NumberOfCores, false );   // this is the number of discrete caches if the level is private
-                                                                                             // or the number of banks if the level is shared
-//PARAM( CacheIDFromParams, 1, true );                                                         // returns a unique CacheID from the parameters (level, num, split_type)
-//PARAM_ARRAY( CacheLatency, int, m_NumberOfCaches, 1, false );                                // returns the latency for cache, indexed by CacheID
-//PARAM_ARRAY( CacheSplitType, string, m_NumberOfCaches, "unified", false );                   // returns "data", "instruction", or "unified", indexed by CacheID
-//PARAM_ARRAY( CacheType, string, m_NumberOfCaches, "SetAssociative", false );                 // returns the type of a cache, indexed by CacheID
-//PARAM_ARRAY( CacheAssoc, int, m_NumberOfCaches, 4, false );                                  // returns the cache associativity, indexed by CacheID
-//PARAM_ARRAY( NumberOfCacheSets, int, m_NumberOfCaches, 256, false );                         // returns the number of cache sets, indexed by CacheID
-//PARAM_ARRAY( NumberOfCacheSetBits, int, m_NumberOfCaches, log_int(256), false );             // returns the number of cache set bits, indexed by CacheID
-//PARAM_ARRAY( CacheReplacementPolicy, string, m_NumberOfCaches, "PSEUDO_LRU", false );        // other option is "LRU"
-
-//PARAM( DataBlockBytes, 64, false );
-//PARAM( DataBlockBits, log_int(m_DataBlockBytes), false);
-
-// ********************************************
-// MEMORY PARAMETERS
-// ********************************************
-
-//PARAM_ARRAY( NumberOfControllersPerType, int, m_NumberOfCacheLevels+2, m_NumberOfCores, false);
-//PARAM_ARRAY2D( NumberOfControllersPerTypePerChip, int, m_NumberOfCacheLevels+2, m_NumberOfChips, m_NumberOfCores, false);
-
-// ********************************************
-// DMA CONTROLLER PARAMETERS
-// ********************************************
-
-//PARAM( NumberOfDMA, 1, false );
-//PARAM_ARRAY( NumberOfDMAPerChip, int, m_NumberOfChips, 1, false);
-//PARAM_ARRAY( ChipNumFromDMAVersion, int, m_NumberOfDMA, 0, false );
-
-//PARAM_ULONG( MemorySizeBytes, 4294967296, false );
-//PARAM_ULONG( MemorySizeBits, 32, false);
-
-//PARAM( NUM_PROCESSORS, 0, false );
-//PARAM( NUM_L2_BANKS, 0, false );
-//PARAM( NUM_MEMORIES, 0, false );
-//PARAM( ProcsPerChip, 1, false );
-
-// The following group of parameters are calculated.  They must
-// _always_ be left at zero.
-//PARAM( NUM_CHIPS, 0, false );
-//PARAM( NUM_CHIP_BITS, 0, false );
-//PARAM( MEMORY_SIZE_BITS, 0, false );
-//PARAM( DATA_BLOCK_BITS, 0, false );
-//PARAM( PAGE_SIZE_BITS, 0, false );
-//PARAM( NUM_PROCESSORS_BITS, 0, false );
-//PARAM( PROCS_PER_CHIP_BITS, 0, false );
-//PARAM( NUM_L2_BANKS_BITS, 0, false );
-//PARAM( NUM_L2_BANKS_PER_CHIP_BITS, 0, false );
-//PARAM( NUM_L2_BANKS_PER_CHIP, 0, false );
-//PARAM( NUM_MEMORIES_BITS, 0, false );
-//PARAM( NUM_MEMORIES_PER_CHIP, 0, false );
-//PARAM( MEMORY_MODULE_BITS, 0, false );
-//PARAM_ULONG( MEMORY_MODULE_BLOCKS, 0, false );
-
-// TIMING PARAMETERS
-//PARAM( DIRECTORY_CACHE_LATENCY, 6, false );
-
-//PARAM( NULL_LATENCY, 1, false );
-//PARAM( ISSUE_LATENCY, 2, false );
-//PARAM( CACHE_RESPONSE_LATENCY, 12, false );
-//PARAM( L2_RESPONSE_LATENCY, 6, false );
-//PARAM( L2_TAG_LATENCY, 6, false );
-//PARAM( L1_RESPONSE_LATENCY, 3, false );
-
-//PARAM( MEMORY_RESPONSE_LATENCY_MINUS_2, 158, false );
-//PARAM( DirectoryLatency, 6, false );
-
-//PARAM( NetworkLinkLatency, 1, false );
-//PARAM( COPY_HEAD_LATENCY, 4, false );
-//PARAM( OnChipLinkLatency, 1, false );
-//PARAM( RecycleLatency, 10, false );
-//PARAM( L2_RECYCLE_LATENCY, 5, false );
-//PARAM( TIMER_LATENCY, 10000, false );
-//PARAM( TBE_RESPONSE_LATENCY, 1, false );
-//PARAM_BOOL(  PERIODIC_TIMER_WAKEUPS, true, false );
-
-// constants used by CMP protocols
-//PARAM( L1_REQUEST_LATENCY, 2, false );
-//PARAM( L2_REQUEST_LATENCY, 4, false );
-//PARAM_BOOL( SINGLE_ACCESS_L2_BANKS, true, false ); // hack to simulate multi-cycle L2 bank accesses
-
-// Ruby cycles between when a sequencer issues a miss it arrives at
-// the L1 cache controller
-//PARAM( SequencerToControllerLatency, 4, false );
-
-// Number of transitions each controller state machines can complete per cycle
-//PARAM( L1CacheTransitionsPerCycle, 32, false );
-//PARAM( L2CACHE_TRANSITIONS_PER_RUBY_CYCLE, 32, false );
-//PARAM( DirectoryTransitionsPerCycle, 32, false );
-//PARAM( DMATransitionsPerCycle, 1, false );
-
-// Number of TBEs available for demand misses, prefetches, and replacements
-//PARAM( NumberOfTBEs, 128, false );
-//PARAM( NumberOfL1TBEs, 32, false );
-//PARAM( NumberOfL2TBEs, 32, false );
-
-// NOTE: Finite buffering allows us to simulate a wormhole routed network
-// with idealized flow control.  All message buffers within the network (i.e.
-// the switch's input and output buffers) are set to the size specified below
-// by the PROTOCOL_BUFFER_SIZE
-//PARAM_BOOL( FiniteBuffering, false, false );
-//PARAM( FiniteBufferSize, 3, false ); // Zero is unbounded buffers
-// Number of requests buffered between the sequencer and the L1 conroller
-// This can be more accurately simulated in Opal, therefore it's set to an
-// infinite number
-// Only effects the simualtion when FINITE_BUFFERING is enabled
-//PARAM( ProcessorBufferSize, 10, false );
-// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
-// Controllers.  Controlls the number of request issued by the L2 HW Prefetcher
-//PARAM( ProtocolBufferSize, 32, false );
-
-// NETWORK PARAMETERS
-
-// Network Topology: See TopologyType in external.sm for valid values
-//PARAM_STRING( NetworkTopology, "PT_TO_PT", false );
-
-// Cache Design specifies file prefix for topology
-//PARAM_STRING( CacheDesign, "NUCA", false );
-
-//PARAM( EndpointBandwidth, 10000, false );
-//PARAM_BOOL( AdaptiveRouting, true, false );
-//PARAM( NumberOfVirtualNetworks, 6, false );
-//PARAM( FanOutDegree, 4, false );
-//PARAM_BOOL( PrintTopology, true, false );
-
-// Princeton Network (Garnet)
-//PARAM_BOOL( UsingGarnetNetwork, true, false );
-//PARAM_BOOL( UsingDetailNetwork, false, false );
-//PARAM_BOOL( UsingNetworkTesting, false, false );
-//PARAM( FlitSize, 16, false );
-//PARAM( NumberOfPipeStages, 4, false );
-//PARAM( VCSPerClass, 4, false );
-//PARAM( BufferSize, 4, false );
-
-// MemoryControl:
-//PARAM( MEM_BUS_CYCLE_MULTIPLIER, 10, false );
-//PARAM( BANKS_PER_RANK, 8, false );
-//PARAM( RANKS_PER_DIMM, 2, false );
-//PARAM( DIMMS_PER_CHANNEL, 2, false );
-//PARAM( BANK_BIT_0, 8, false );
-//PARAM( RANK_BIT_0, 11, false );
-//PARAM( DIMM_BIT_0, 12, false );
-//PARAM( BANK_QUEUE_SIZE, 12, false );
-//PARAM( BankBusyTime, 11, false );
-//PARAM( RANK_RANK_DELAY, 1, false );
-//PARAM( READ_WRITE_DELAY, 2, false );
-//PARAM( BASIC_BUS_BUSY_TIME, 2, false );
-//PARAM( MEM_CTL_LATENCY, 12, false );
-//PARAM( REFRESH_PERIOD, 1560, false );
-//PARAM( TFAW, 0, false );
-//PARAM( MEM_RANDOM_ARBITRATE, 0, false );
-//PARAM( MEM_FIXED_DELAY, 0, false );
-
diff --git a/src/mem/ruby/config/defaults.rb b/src/mem/ruby/config/defaults.rb
index f338f4e3f..224bf1eeb 100644
--- a/src/mem/ruby/config/defaults.rb
+++ b/src/mem/ruby/config/defaults.rb
@@ -1,7 +1,5 @@
 #!/usr/bin/ruby
 
-
-
 class NetPort < LibRubyObject
   # number of transitions a SLICC state machine can transition per
   # cycle
@@ -9,9 +7,8 @@ class NetPort < LibRubyObject
 
   # buffer_size limits the size of all other buffers connecting to
   # SLICC Controllers.  When 0, infinite buffering is used.
-  default_param :buffer_size, Integer, 0
+  default_param :buffer_size, Integer, 32
 
-  # added by SS for TBE
   default_param :number_of_TBEs, Integer, 256
 
   default_param :recycle_latency, Integer, 10
@@ -38,16 +35,36 @@ class Debug < LibRubyObject
   #   3. set start_time = 1
   default_param :protocol_trace, Boolean, false
 
-  # a string for filtering debugging output (for all g_debug vars see Debug.h)
+  # a string for filtering debugging output. Valid options (also see Debug.cc):
+  #    {"System",            's' },
+  #    {"Node",              'N' },
+  #    {"Queue",             'q' },
+  #    {"Event Queue",       'e' },
+  #    {"Network",           'n' },
+  #    {"Sequencer",         'S' },
+  #    {"Tester",            't' },
+  #    {"Generated",         'g' },
+  #    {"SLICC",             'l' },
+  #    {"Network Queues",    'Q' },
+  #    {"Time",              'T' },
+  #    {"Network Internals", 'i' },
+  #    {"Store Buffer",      'b' },
+  #    {"Cache",             'c' },
+  #    {"Predictor",         'p' },
+  #    {"Allocator",         'a' }
+  #
+  #  e.g., "sq" will print system and queue debugging messages
+  #  Set to "none" for no debugging output
   default_param :filter_string, String, "none"
 
-  # filters debugging messages based on priority (low, med, high)
+  # filters debugging messages based on priority (none, low, med, high)
   default_param :verbosity_string, String, "none"
 
   # filters debugging messages based on a ruby time
   default_param :start_time, Integer, 1
   
   # sends debugging messages to a output filename
+  # set to "none" to print to stdout
   default_param :output_filename, String, "none"
 end
 
@@ -65,23 +82,23 @@ class Topology < LibRubyObject
 
   # indicates whether the topology config will be displayed in the
   # stats file
-  default_param :print_config, Boolean, true
+  default_param :print_config, Boolean, false
 end
 
 class Network < LibRubyObject
   default_param :endpoint_bandwidth, Integer, 10000
   default_param :adaptive_routing, Boolean, true
-  default_param :number_of_virtual_networks, Integer, 10
-  default_param :fan_out_degree, Integer, 4
+  default_param :number_of_virtual_networks, Integer, 5
+  #  default_param :fan_out_degree, Integer, 4
 
   # default buffer size.  Setting to 0 indicates infinite buffering
-  default_param :buffer_size, Integer, 0
+  #  default_param :buffer_size, Integer, 0
 
   # local memory latency ?? NetworkLinkLatency
   default_param :link_latency, Integer, 1
 
   # on chip latency
-  default_param :on_chip_latency, Integer, 1
+  #  default_param :on_chip_latency, Integer, 1
  
   default_param :control_msg_size, Integer, 8
 end
@@ -94,20 +111,15 @@ class GarnetNetwork < Network
   default_param :using_network_testing, Boolean, false
 end
 
-
-
-#added by SS
 class Tracer < LibRubyObject
   default_param :warmup_length, Integer, 1000000
 end
 
-#added by SS
 class Profiler < LibRubyObject
   default_param :hot_lines, Boolean, false
   default_param :all_instructions, Boolean, false
 end
 
-#added by SS
 class MemoryControl < LibRubyObject
 
   default_param :mem_bus_cycle_multiplier, Integer, 10
@@ -125,7 +137,7 @@ class MemoryControl < LibRubyObject
   default_param :mem_ctl_latency, Integer, 12
   default_param :refresh_period, Integer, 1560
   default_param :tFaw, Integer, 0
-  default_param :mem_random_arbitrate, Integer, 0
+  default_param :mem_random_arbitrate, Integer, 11
   default_param :mem_fixed_delay, Integer, 0
 
 end
@@ -163,49 +175,33 @@ class MOESI_CMP_directory_DirectoryController < DirectoryController
 end
 
 class MOESI_CMP_directory_DMAController < DMAController
-  default_param :request_latency, Integer, 6
-  default_param :response_latency, Integer, 6
+  default_param :request_latency, Integer, 14
+  default_param :response_latency, Integer, 14
 end
 
-## MOESI_CMP_token protocol
+class MESI_CMP_directory_L2CacheController < CacheController
+  default_param :request_latency, Integer, 2
+  default_param :response_latency, Integer, 2
+  default_param :to_L1_latency, Integer, 1
+
+#if 0 then automatically calculated
+  default_param :lowest_bit, Integer, 0
+  default_param :highest_bit, Integer, 0    
+end
 
-class MOESI_CMP_token_L1CacheController < L1CacheController
+class MESI_CMP_directory_L1CacheController < L1CacheController
   default_param :l1_request_latency, Integer, 2
   default_param :l1_response_latency, Integer, 2
-  default_param :retry_threshold, Integer, 1
-  default_param :fixed_timeout_latency, Integer, 300
-  default_param :dynamic_timeout_enabled, Boolean, true
+  default_param :to_L2_latency, Integer, 1
 end
 
-class MOESI_CMP_token_L2CacheController < CacheController
-  default_param :l2_request_latency, Integer, 2
-  default_param :l2_response_latency, Integer, 2
-  default_param :filtering_enabled, Boolean, true
-end
 
-class MOESI_CMP_token_DirectoryController < DirectoryController
+class MESI_CMP_directory_DirectoryController < DirectoryController
+  default_param :to_mem_ctrl_latency, Integer, 1
   default_param :directory_latency, Integer, 6
-  default_param :distributed_persistent, Boolean, true
-  default_param :fixed_timeout_latency, Integer, 300
-end
-
-class MOESI_CMP_token_DMAController < DMAController
-  default_param :request_latency, Integer, 6
-  default_param :response_latency, Integer, 6
-end
-
-## MOESI_hammer protocol
-
-class MOESI_hammer_CacheController < L1CacheController
-  default_param :issue_latency, Integer, 2
-  default_param :cache_response_latency, Integer, 12
-end
-
-class MOESI_hammer_DirectoryController < DirectoryController
-  default_param :memory_controller_latency, Integer, 12
 end
 
-class MOESI_hammer_DMAController < DMAController
+class MESI_CMP_directory_DMAController < DMAController
   default_param :request_latency, Integer, 6
 end
 
@@ -219,8 +215,9 @@ class RubySystem
   # When set to true, the simulation will insert random delays on
   # message enqueue times.  Note that even if this is set to false,
   # you can still have a non-deterministic simulation if random seed
-  # is set to "rand".  This is because the Ruby swtiches use random
-  # link priority elevation
+  # is set to "rand".  This is used mainly to debug protocols by forcing 
+  # really strange interleavings and should not be used for 
+  # performance runs.
   default_param :randomization, Boolean, false
 
   # tech_nm is the device size used to calculate latency and area
@@ -246,31 +243,6 @@ class RubySystem
 
   default_param :profiler, Profiler, Profiler.new("profiler0")
 end
-#added by SS
-
-class MESI_CMP_directory_L2CacheController < CacheController
-  default_param :l2_request_latency, Integer, 2
-  default_param :l2_response_latency, Integer, 2
-  default_param :to_L1_latency, Integer, 1
-
-#if 0 then automatically calculated
-  default_param :lowest_bit, Integer, 0
-  default_param :highest_bit, Integer, 0    
-end
-
-class MESI_CMP_directory_L1CacheController < L1CacheController
-  default_param :l1_request_latency, Integer, 2
-  default_param :l1_response_latency, Integer, 2
-  default_param :to_L2_latency, Integer, 1
-end
 
 
-class MESI_CMP_directory_DirectoryController < DirectoryController
-  default_param :to_mem_ctrl_latency, Integer, 1
-  default_param :directory_latency, Integer, 6
-end
-
-class MESI_CMP_directory_DMAController < DMAController
-  default_param :request_latency, Integer, 6
-end
 
diff --git a/src/mem/ruby/config/rubyconfig.defaults b/src/mem/ruby/config/rubyconfig.defaults
deleted file mode 100644
index 936a2f091..000000000
--- a/src/mem/ruby/config/rubyconfig.defaults
+++ /dev/null
@@ -1,405 +0,0 @@
-//
-// This file has been modified by Kevin Moore and Dan Nussbaum of the
-// Scalable Systems Research Group at Sun Microsystems Laboratories
-// (http://research.sun.com/scalable/) to support the Adaptive
-// Transactional Memory Test Platform (ATMTP).  For information about
-// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/.
-//
-// Please send email to atmtp-interest@sun.com with feedback, questions, or
-// to request future announcements about ATMTP.
-//
-// ----------------------------------------------------------------------
-//
-// File modification date: 2008-02-23
-//
-// ----------------------------------------------------------------------
-//
-// ATMTP is distributed as part of the GEMS software toolset and is
-// available for use and modification under the terms of version 2 of the
-// GNU General Public License.  The GNU General Public License is contained
-// in the file $GEMS/LICENSE.
-//
-// Multifacet GEMS is free software; you can redistribute it and/or modify
-// it under the terms of version 2 of the GNU General Public License as
-// published by the Free Software Foundation.
-//
-// Multifacet GEMS is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with the Multifacet GEMS; if not, write to the Free Software Foundation,
-// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
-//
-// ----------------------------------------------------------------------
-//
-
-g_RANDOM_SEED: 1
-
-g_DEADLOCK_THRESHOLD: 500000
-
-// determines how many Simics cycles advance for every Ruby cycle
-//  (does not apply when running Opal)
-SIMICS_RUBY_MULTIPLIER: 4
-
-// Ruby cycles between when a sequencer issues a request and it arrives at
-// the L1 cache controller
-//
-// ** important ** this parameter determines the L2 hit latency when
-//  using the SMP protocols with a combined L1/L2 controller (-cache.sm)
-//
-SEQUENCER_TO_CONTROLLER_LATENCY: 4
-
-
-// When set to false, the L1 cache structures are probed for a hit in Sequencer.C
-//  If a request hits, it is *not* issued to the cache controller
-// When set to true, all processor data requests issue to cache controller
-//
-// ** important ** this parameter must be set to false for proper L1/L2 hit timing
-//  for the SMP protocols with combined L1/L2 controllers (-cache.sm)
-//
-REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false
-
-
-// When running with Opal in SMT configurations, this indicates the number of threads per physical processor
-g_NUM_SMT_THREADS: 1
-
-
-// Maximum number of requests (including SW prefetches) outstanding from
-// the sequencer (Note: this also include items buffered in the store
-// buffer)
-g_SEQUENCER_OUTSTANDING_REQUESTS: 16
-
-
-PROTOCOL_DEBUG_TRACE: true
-DEBUG_FILTER_STRING: none
-DEBUG_VERBOSITY_STRING: none
-DEBUG_START_TIME: 0
-DEBUG_OUTPUT_FILENAME: none
-
-
-TRANSACTION_TRACE_ENABLED: false
-USER_MODE_DATA_ONLY: false
-PROFILE_HOT_LINES: false
-
-PROFILE_ALL_INSTRUCTIONS: false
-PRINT_INSTRUCTION_TRACE: false
-g_DEBUG_CYCLE: 0
-BLOCK_STC: false
-PERFECT_MEMORY_SYSTEM: false
-PERFECT_MEMORY_SYSTEM_LATENCY: 0
-DATA_BLOCK: false
-
-
-// *********************************************
-// CACHE & MEMORY PARAMETERS
-// *********************************************
-
-
-L1_CACHE_ASSOC: 4
-L1_CACHE_NUM_SETS_BITS: 8
-L2_CACHE_ASSOC: 4
-L2_CACHE_NUM_SETS_BITS: 16
-
-// 32 bits = 4 GB address space
-g_MEMORY_SIZE_BYTES: 1073741824 //4294967296
-g_DATA_BLOCK_BYTES: 64
-g_PAGE_SIZE_BYTES: 4096
-g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU
-
-g_PROCS_PER_CHIP: 1
-
-
-// set automatically
-g_NUM_PROCESSORS: 0
-g_NUM_L2_BANKS: 0
-g_NUM_MEMORIES: 0
-
-// The following group of parameters are calculated.  They must
-// _always_ be left at zero.
-g_NUM_CHIPS: 0
-g_NUM_CHIP_BITS: 0
-g_MEMORY_SIZE_BITS: 0
-g_DATA_BLOCK_BITS: 0
-g_PAGE_SIZE_BITS: 0
-g_NUM_PROCESSORS_BITS: 0
-g_PROCS_PER_CHIP_BITS: 0
-g_NUM_L2_BANKS_BITS: 0
-g_NUM_L2_BANKS_PER_CHIP: 0
-g_NUM_L2_BANKS_PER_CHIP_BITS: 0
-g_NUM_MEMORIES_BITS: 0
-g_NUM_MEMORIES_PER_CHIP: 0
-g_MEMORY_MODULE_BITS: 0
-g_MEMORY_MODULE_BLOCKS: 0
-
-
-// For certain CMP protocols, determines whether the lowest bits of a block address
-// are used to index to a L2 cache bank or into the sets of a
-// single bank
-//        lowest                                                             highest
-// true:   g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS
-// false:  g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS
-MAP_L2BANKS_TO_LOWEST_BITS: false
-
-
-
-// TIMING PARAMETERS  -- many of these are protocol specific.  See SLICC files
-//                       to determine where they apply
-
-MEMORY_RESPONSE_LATENCY_MINUS_2: 158  // determines memory response latency
-DIRECTORY_CACHE_LATENCY: 6
-NULL_LATENCY: 1
-ISSUE_LATENCY: 2
-CACHE_RESPONSE_LATENCY: 12
-L1_RESPONSE_LATENCY: 3
-L2_RESPONSE_LATENCY: 6
-L2_TAG_LATENCY: 6
-DIRECTORY_LATENCY: 80
-NETWORK_LINK_LATENCY: 1
-COPY_HEAD_LATENCY: 4
-ON_CHIP_LINK_LATENCY: 1
-RECYCLE_LATENCY: 10
-L2_RECYCLE_LATENCY: 5
-TIMER_LATENCY: 10000
-TBE_RESPONSE_LATENCY: 1
-PERIODIC_TIMER_WAKEUPS: true
-
-
-// constants used by CMP protocols
-// cache bank access times
-L1_REQUEST_LATENCY: 2
-L2_REQUEST_LATENCY: 4
-
-
-// Number of transitions each controller state machines can complete per cycle
-// i.e. the number of ports to each controller
-// L1cache is the sum of the L1I and L1D cache ports
-L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
-// Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a
-// much greater constraint on the concurrency of a L2 cache bank
-L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
-DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32
-DMA_TRANSITIONS_PER_RUBY_CYCLE: 1
-
-
-// Number of TBEs available for demand misses, ALL prefetches, and replacements
-// used by one-level protocols
-NUMBER_OF_TBES: 128
-// two-level protocols
-NUMBER_OF_L1_TBES: 32
-NUMBER_OF_L2_TBES: 32
-
-// ** INTERCONECT PARAMETERS **
-//
-g_PRINT_TOPOLOGY: true
-g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH
-g_CACHE_DESIGN: NUCA  // specifies file prefix for FILE_SPECIFIED topology
-FAN_OUT_DEGREE: 4  // for HIERARCHICAL SWITCH topology
-
-g_adaptive_routing: true
-NUMBER_OF_VIRTUAL_NETWORKS: 6
-
-// bandwidth unit is 1/1000 byte per cycle.  the following parameter is multiplied by
-//  topology specific link weights
-g_endpoint_bandwidth: 10000
-
-
-// ** finite buffering parameters
-//
-// note: Finite buffering allows us to simulate a realistic virtual cut-through
-// routed network with idealized flow control.  this feature is NOT heavily tested
-FINITE_BUFFERING: false
-// All message buffers within the network (i.e. the switch's input and
-// output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE
-FINITE_BUFFER_SIZE: 3
-// g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests
-// issued by the sequencer.  The PROCESSOR_BUFFER_SIZE controlls the
-// number of requests in the mandatory queue
-// Only effects the simualtion when FINITE_BUFFERING is enabled
-PROCESSOR_BUFFER_SIZE: 10
-// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
-// Controllers.  Controlls the number of request issued by the L2 HW Prefetcher
-PROTOCOL_BUFFER_SIZE: 32
-// ** end finite buffering parameters
-
-
-// (deprecated)
-// Allows on a single accesses to a multi-cycle L2 bank.
-// Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY
-// number of cycles.  However the TBE table can be accessed in parallel.
-SINGLE_ACCESS_L2_BANKS: true
-
-
-// MOESI_CMP_token parameters (some might be deprecated)
-g_FILTERING_ENABLED: false
-g_DISTRIBUTED_PERSISTENT_ENABLED: true
-g_RETRY_THRESHOLD: 1
-g_DYNAMIC_TIMEOUT_ENABLED: true
-g_FIXED_TIMEOUT_LATENCY: 300
-
-
-// tester parameters (overridden by testerconfig.defaults)
-//
-//  injects random message delays to excite protocol races
-RANDOMIZATION: false
-g_SYNTHETIC_DRIVER: false
-g_DETERMINISTIC_DRIVER: false
-g_trace_warmup_length: 1000000
-g_bash_bandwidth_adaptive_threshold: 0.75
-
-g_tester_length: 0
-// # of synthetic locks == 16 * 128
-g_synthetic_locks: 2048
-g_deterministic_addrs: 1
-g_SpecifiedGenerator: DetermInvGenerator
-g_callback_counter: 0
-g_NUM_COMPLETIONS_BEFORE_PASS: 0
-// parameters used by locking synthetic tester
-g_think_time: 5
-g_hold_time:  5
-g_wait_time:  5
-
-// Princeton Network (Garnet)
-g_GARNET_NETWORK: true
-g_DETAIL_NETWORK: false
-g_NETWORK_TESTING: false
-g_FLIT_SIZE: 16
-g_NUM_PIPE_STAGES: 4
-g_VCS_PER_CLASS: 4
-g_BUFFER_SIZE: 4
-
-///////////////////////////////////////////////////////////////////////////////
-//
-// MemoryControl:
-
-// Basic cycle time of the memory controller.  This defines the period which is
-// used as the memory channel clock period, the address bus bit time, and the
-// memory controller cycle time.
-// Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data),
-// and a 2 GHz Ruby clock:
-MEM_BUS_CYCLE_MULTIPLIER: 10
-
-// How many internal banks in each DRAM chip:
-BANKS_PER_RANK: 8
-
-// How many sets of DRAM chips per DIMM.
-RANKS_PER_DIMM: 2
-
-// How many DIMMs per channel.  (Currently the only thing that
-// matters is the number of ranks per channel, i.e. the product
-// of this parameter and RANKS_PER_DIMM.  But if and when this is
-// expanded to do FB-DIMMs, the distinction between the two
-// will matter.)
-DIMMS_PER_CHANNEL: 2
-
-// Which bits to use to find the bank, rank, and DIMM numbers.
-// You could choose to have the bank bits, rank bits, and DIMM bits
-// in any order; here they are in that order.
-// For these defaults, we assume this format for addresses:
-//    Offset within line:     [5:0]
-//    Memory controller #:    [7:6]
-//    Bank:                  [10:8]
-//    Rank:                    [11]
-//    DIMM:                    [12]
-//    Row addr / Col addr: [top:13]
-// If you get these bits wrong, then some banks won't see any
-// requests; you need to check for this in the .stats output.
-BANK_BIT_0: 8
-RANK_BIT_0: 11
-DIMM_BIT_0: 12
-
-// Number of entries max in each bank queues; set to whatever you want.
-// If it is too small, you will see in the .stats file a lot of delay
-// time spent in the common input queue.
-BANK_QUEUE_SIZE: 12
-
-// Bank cycle time (tRC) measured in memory cycles:
-BANK_BUSY_TIME: 11
-
-// This is how many memory address cycles to delay between reads to
-// different ranks of DRAMs to allow for clock skew:
-RANK_RANK_DELAY: 1
-
-// This is how many memory address cycles to delay between a read
-// and a write.  This is based on two things:  (1) the data bus is
-// used one cycle earlier in the operation; (2) a round-trip wire
-// delay from the controller to the DIMM that did the reading.
-READ_WRITE_DELAY: 2
-
-// Basic address and data bus occupancy.  If you are assuming a
-// 16-byte-wide data bus (pairs of DIMMs side-by-side), then
-// the data bus occupancy matches the address bus occupancy at
-// two cycles.  But if the channel is only 8 bytes wide, you
-// need to increase this bus occupancy time to 4 cycles.
-BASIC_BUS_BUSY_TIME: 2
-
-// Latency to returning read request or writeback acknowledgement.
-// Measured in memory address cycles.
-// This equals tRCD + CL + AL + (four bit times)
-//                            + (round trip on channel)
-//                            + (memory control internal delays)
-// It's going to be an approximation, so pick what you like.
-// Note:  The fact that latency is a constant, and does not depend on two
-// low-order address bits, implies that our memory controller either:
-// (a) tells the DRAM to read the critical word first, and sends the
-// critical word first back to the CPU, or (b) waits until it has
-// seen all four bit times on the data wires before sending anything
-// back.  Either is plausible.  If (a), remove the "four bit times"
-// term from the calculation above.
-MEM_CTL_LATENCY: 12
-
-// refresh_period is the number of memory cycles between refresh
-// of row x in bank n and refresh of row x+1 in bank n.  For DDR-400,
-// this is typically 7.8 usec for commercial systems; after 8192 such
-// refreshes, this will have refreshed the whole chip in 64 msec.  If
-// we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles.  The memory
-// controller will divide this by the total number of banks, and kick
-// off a refresh to *somebody* every time that amount is counted
-// down to zero. (There will be some rounding error there, but it
-// should have minimal effect.)
-REFRESH_PERIOD: 1560
-
-// tFAW is a DRAM chip parameter which restricts the number of
-// activates that can be done within a certain window of time.
-// The window is specified here in terms of number of memory
-// controller cycles.  At most four activates may be done during
-// any such sliding window.  If this number is set to be no more
-// than 4 * BASIC_BUS_BUSY_TIME, it will have no effect.
-// It is typical in real systems for tFAW to have no effect, but
-// it may be useful in throttling power.  Set to zero to ignore.
-TFAW: 0
-
-// By default, the memory controller uses round-robin to arbitrate
-// between ready bank queues for use of the address bus.  If you
-// wish to add randomness to the system, set this parameter to
-// one instead, and it will restart the round-robin pointer at a
-// random bank number each cycle.  If you want additional
-// nondeterminism, set the parameter to some integer n >= 2, and
-// it will in addition add a n% chance each cycle that a ready bank
-// will be delayed an additional cycle.  Note that if you are
-// in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will
-// have no effect, but MEM_RANDOM_ARBITRATE=2 or more will.
-MEM_RANDOM_ARBITRATE: 0
-
-// The following parameter, if nonzero, will disable the memory
-// controller and instead give every request a fixed latency.  The
-// nonzero value specified here is measured in memory cycles and is
-// just added to MEM_CTL_LATENCY.  It will also show up in the stats
-// file as a contributor to memory_delays_stalled_at_head_of_bank_queue.
-MEM_FIXED_DELAY: 0
-
-// If instead of DDR-400, you wanted DDR-800, the channel gets faster
-// but the basic operation of the DRAM core is unchanged.
-// Busy times appear to double just because they are measured
-// in smaller clock cycles.  The performance advantage comes because
-// the bus busy times don't actually quite double.
-// You would use something like these values:
-//
-// MEM_BUS_CYCLE_MULTIPLIER: 5
-// BANK_BUSY_TIME: 22
-// RANK_RANK_DELAY: 2
-// READ_WRITE_DELAY: 3
-// BASIC_BUS_BUSY_TIME: 3
-// MEM_CTL_LATENCY: 20
-// REFRESH_PERIOD: 3120
diff --git a/src/mem/ruby/config/tester.defaults b/src/mem/ruby/config/tester.defaults
deleted file mode 100644
index b30d1ba99..000000000
--- a/src/mem/ruby/config/tester.defaults
+++ /dev/null
@@ -1,50 +0,0 @@
-
-//
-// This file contains tester specific changes to the rubyconfig.defaults
-// parameter values.
-//
-// Please: - Add new variables only to rubyconfig.defaults file.
-//         - Change them here only when necessary.
-
-g_SIMICS: false
-DATA_BLOCK: true
-RANDOMIZATION: true
-g_SYNTHETIC_DRIVER: false
-g_DETERMINISTIC_DRIVER: true
-g_DEADLOCK_THRESHOLD: 500000
-g_SpecifiedGenerator: DetermGETXGenerator
-
-PROTOCOL_DEBUG_TRACE: true
-
-//
-// Generic cache parameters
-//
-
-// Cache sizes are smaller for the random tester to increase the amount
-// of false sharing.
-L1_CACHE_ASSOC: 2
-L1_CACHE_NUM_SETS_BITS: 2
-L2_CACHE_ASSOC: 2
-L2_CACHE_NUM_SETS_BITS: 5
-
-g_MEMORY_SIZE_BYTES: 1048576
-
-//g_NETWORK_TOPOLOGY: FILE_SPECIFIED
-RECYCLE_LATENCY: 1
-//NUMBER_OF_VIRTUAL_NETWORKS: 5
-//g_NUM_MEMORIES: 16
-L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 1000
-DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 1000
-//g_PROCS_PER_CHIP: 2
-//g_NUM_L2_BANKS: 16
-//g_endpoint_bandwidth: 10000
-//g_NUM_PROCESSORS: 16
-//g_NUM_SMT_THREADS: 1
-//g_GARNET_NETWORK: true
-//g_DETAIL_NETWORK: true
-//g_NETWORK_TESTING: false
-//g_FLIT_SIZE: 32
-//g_NUM_PIPE_STAGES: 5
-//g_VCS_PER_CLASS: 2
-//g_BUFFER_SIZE: 4
-
diff --git a/src/mem/ruby/libruby.hh b/src/mem/ruby/libruby.hh
index 29aac232a..4c50611c1 100644
--- a/src/mem/ruby/libruby.hh
+++ b/src/mem/ruby/libruby.hh
@@ -34,7 +34,7 @@ struct RubyRequest {
   unsigned proc_id;
 
   RubyRequest() {}
-  RubyRequest(uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, unsigned _proc_id = 0)
+  RubyRequest(uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, unsigned _proc_id = 100)
     : paddr(_paddr), data(_data), len(_len), pc(_pc), type(_type), access_mode(_access_mode), proc_id(_proc_id)
   {}
 };
@@ -71,6 +71,12 @@ RubyPortHandle libruby_get_port(const char* name, void (*hit_callback)(int64_t a
 RubyPortHandle libruby_get_port_by_name(const char* name);
 
 
+/** 
+ * libruby_issue_request error return codes 
+ */
+#define LIBRUBY_BUFFER_FULL -2
+#define LIBRUBY_ALIASED_REQUEST -3
+
 /**
  * issue_request returns a unique access_id to identify the ruby
  * transaction. This access_id is later returned to the caller via
diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc
index 02fc8db2a..467e1bf87 100644
--- a/src/mem/ruby/network/simple/PerfectSwitch.cc
+++ b/src/mem/ruby/network/simple/PerfectSwitch.cc
@@ -184,7 +184,7 @@ void PerfectSwitch::wakeup()
 
         assert(m_link_order.size() == m_routing_table.size());
         assert(m_link_order.size() == m_out.size());
-//changed by SS
+
         if (m_network_ptr->getAdaptiveRouting()) {
           if (m_network_ptr->isVNetOrdered(vnet)) {
             // Don't adaptively route
diff --git a/src/mem/ruby/network/simple/Topology.cc b/src/mem/ruby/network/simple/Topology.cc
index dedf79d58..563a1b01c 100644
--- a/src/mem/ruby/network/simple/Topology.cc
+++ b/src/mem/ruby/network/simple/Topology.cc
@@ -79,7 +79,6 @@ void Topology::init(const vector<string> & argv)
       m_connections = argv[i+1];
     else if (argv[i] == "print_config") {
       m_print_config = string_to_bool(argv[i+1]);
-      cerr << "print config: " << m_print_config << endl;
     }
   }
   assert(m_network_ptr != NULL);
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 7da3d317a..1d1c56aba 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -22,8 +22,8 @@ public:
   virtual const string getName() const = 0;   // return instance name
   virtual const MachineType getMachineType() const = 0;
   virtual void set_atomic(Address addr) = 0;
-  virtual void started_writes() = 0;
-  virtual void clear_atomic() = 0;
+  virtual void clear_atomic(Address addr) = 0;
+  virtual void reset_atomics() = 0;
 
   virtual void print(ostream & out) const = 0;
   virtual void printStats(ostream & out) const = 0;
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
index 222ff86f8..69424c414 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
@@ -94,6 +94,17 @@ MachineID map_Address_to_DMA(const Address & addr)
   return dma;
 }
 
+inline 
+NetDest broadcast(MachineType type)
+{
+  NetDest dest;
+  for (int i=0; i<MachineType_base_count(type); i++) {
+    MachineID mach = {type, i};
+    dest.add(mach);
+  }
+  return dest;
+}
+
 inline
 MachineID mapAddressToRange(const Address & addr, MachineType type, int low_bit, int num_bits)
 {
diff --git a/src/mem/ruby/system/CacheMemory.cc b/src/mem/ruby/system/CacheMemory.cc
index 630b94542..cf3e094ad 100644
--- a/src/mem/ruby/system/CacheMemory.cc
+++ b/src/mem/ruby/system/CacheMemory.cc
@@ -83,10 +83,8 @@ void CacheMemory::init(const vector<string> & argv)
     }
   }
 
-  assert(cache_size != -1);
-  
-  m_cache_num_sets = (cache_size / m_cache_assoc) / RubySystem::getBlockSizeBytes();
-  assert(m_cache_num_sets > 1);
+  int num_lines = cache_size/RubySystem::getBlockSizeBytes();
+  m_cache_num_sets = num_lines / m_cache_assoc;
   m_cache_num_set_bits = log_int(m_cache_num_sets);
   assert(m_cache_num_set_bits > 0);
 
@@ -122,7 +120,7 @@ CacheMemory::~CacheMemory()
 }
 
 int
-CacheMemory::numberOfLastLevelCaches() 
+CacheMemory::numberOfLastLevelCaches()
 { 
   return m_num_last_level_caches; 
 }
@@ -165,13 +163,10 @@ int CacheMemory::findTagInSet(Index cacheSet, const Address& tag) const
 {
   assert(tag == line_address(tag));
   // search the set for the tags
-  for (int i=0; i < m_cache_assoc; i++) {
-    if ((m_cache[cacheSet][i] != NULL) &&
-        (m_cache[cacheSet][i]->m_Address == tag) &&
-        (m_cache[cacheSet][i]->m_Permission != AccessPermission_NotPresent)) {
-      return i;
-    }
-  }
+  m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag);
+  if (it != m_tag_index.end())
+    if (m_cache[cacheSet][it->second]->m_Permission != AccessPermission_NotPresent)
+      return it->second;
   return -1; // Not found
 }
 
@@ -181,10 +176,9 @@ int CacheMemory::findTagInSetIgnorePermissions(Index cacheSet, const Address& ta
 {
   assert(tag == line_address(tag));
   // search the set for the tags
-  for (int i=0; i < m_cache_assoc; i++) {
-    if (m_cache[cacheSet][i] != NULL && m_cache[cacheSet][i]->m_Address == tag)
-      return i;
-  }
+  m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag);
+  if (it != m_tag_index.end())
+    return it->second;
   return -1; // Not found
 }
 
@@ -291,6 +285,7 @@ void CacheMemory::allocate(const Address& address, AbstractCacheEntry* entry)
       m_cache[cacheSet][i]->m_Address = address;
       m_cache[cacheSet][i]->m_Permission = AccessPermission_Invalid;
       m_locked[cacheSet][i] = -1;
+      m_tag_index[address] = i;
 
       m_replacementPolicy_ptr->touch(cacheSet, i, g_eventQueue_ptr->getTime());
 
@@ -311,6 +306,7 @@ void CacheMemory::deallocate(const Address& address)
     delete m_cache[cacheSet][location];
     m_cache[cacheSet][location] = NULL;
     m_locked[cacheSet][location] = -1;
+    m_tag_index.erase(address);
   }
 }
 
diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh
index 856b7bcac..8b84f33ec 100644
--- a/src/mem/ruby/system/CacheMemory.hh
+++ b/src/mem/ruby/system/CacheMemory.hh
@@ -54,6 +54,7 @@
 #include "mem/ruby/slicc_interface/AbstractController.hh"
 #include "mem/ruby/profiler/CacheProfiler.hh"
 #include "mem/protocol/CacheMsg.hh"
+#include "base/hashmap.hh"
 #include <vector>
 
 class CacheMemory {
@@ -70,8 +71,6 @@ public:
   //  static CacheMemory* createCache(int level, int num, char split_type, AbstractCacheEntry* (*entry_factory)());
   //  static CacheMemory* getCache(int cache_id);
 
-  static int numberOfLastLevelCaches();
-  
   // Public Methods
   void printConfig(ostream& out);
 
@@ -106,6 +105,8 @@ public:
   AccessPermission getPermission(const Address& address) const;
   void changePermission(const Address& address, AccessPermission new_perm);
 
+  static int numberOfLastLevelCaches();
+
   int getLatency() const { return m_latency; }
 
   // Hook for checkpointing the contents of the cache
@@ -158,6 +159,7 @@ private:
 
   // The first index is the # of cache lines.
   // The second index is the the amount associativity.
+  m5::hash_map<Address, int> m_tag_index;
   Vector<Vector<AbstractCacheEntry*> > m_cache;
   Vector<Vector<int> > m_locked;
 
@@ -169,9 +171,11 @@ private:
   int m_cache_num_set_bits;
   int m_cache_assoc;
 
+  static Vector< CacheMemory* > m_all_caches;
+  
   static int m_num_last_level_caches;
   static MachineType m_last_level_machine_type;
-  static Vector< CacheMemory* > m_all_caches;
+
 };
 
 #endif //CACHEMEMORY_H
diff --git a/src/mem/ruby/system/DMASequencer.hh b/src/mem/ruby/system/DMASequencer.hh
index 1f60b95ec..77c0a2258 100644
--- a/src/mem/ruby/system/DMASequencer.hh
+++ b/src/mem/ruby/system/DMASequencer.hh
@@ -25,6 +25,7 @@ public:
   void init(const vector<string> & argv);
   /* external interface */
   int64_t makeRequest(const RubyRequest & request);
+  bool isReady(const RubyRequest & request, bool dont_set = false) { assert(0); return false;};
   //  void issueRequest(uint64_t paddr, uint8* data, int len, bool rw);
   bool busy() { return m_is_busy;}
 
diff --git a/src/mem/ruby/system/DirectoryMemory.cc b/src/mem/ruby/system/DirectoryMemory.cc
index e230059ad..9b2a3873c 100644
--- a/src/mem/ruby/system/DirectoryMemory.cc
+++ b/src/mem/ruby/system/DirectoryMemory.cc
@@ -44,7 +44,7 @@
 
 int DirectoryMemory::m_num_directories = 0;
 int DirectoryMemory::m_num_directories_bits = 0;
-int DirectoryMemory::m_total_size_bytes = 0;
+uint64_t DirectoryMemory::m_total_size_bytes = 0;
 
 DirectoryMemory::DirectoryMemory(const string & name)
  : m_name(name)
diff --git a/src/mem/ruby/system/DirectoryMemory.hh b/src/mem/ruby/system/DirectoryMemory.hh
index 39de679ed..09211fd83 100644
--- a/src/mem/ruby/system/DirectoryMemory.hh
+++ b/src/mem/ruby/system/DirectoryMemory.hh
@@ -91,7 +91,7 @@ private:
 
   static int m_num_directories;
   static int m_num_directories_bits;
-  static int m_total_size_bytes;
+  static uint64_t m_total_size_bytes;
 
   MemoryVector* m_ram;
 };
diff --git a/src/mem/ruby/system/MemoryVector.hh b/src/mem/ruby/system/MemoryVector.hh
index c5f3cea7f..775244840 100644
--- a/src/mem/ruby/system/MemoryVector.hh
+++ b/src/mem/ruby/system/MemoryVector.hh
@@ -21,61 +21,105 @@ class MemoryVector {
   void write(const Address & paddr, uint8* data, int len);
   uint8* read(const Address & paddr, uint8* data, int len);
 
- private:
-  uint8* getBlockPtr(const Address & paddr);
+private:
+  uint8* getBlockPtr(const PhysAddress & addr);
 
   uint32 m_size;
-  uint8* m_vec;
+  uint8** m_pages;
+  uint32 m_num_pages;
+  const uint32 m_page_offset_mask;
 };
 
 inline
 MemoryVector::MemoryVector()
+  : m_page_offset_mask(4095)
 {
   m_size = 0;
-  m_vec = NULL;
+  m_num_pages = 0;
+  m_pages = NULL;
 }
 
 inline
 MemoryVector::MemoryVector(uint32 size)
+  : m_page_offset_mask(4095)
 {
-  m_size = size;
-  m_vec = new uint8[size];
+  setSize(size);
 }
 
 inline
 MemoryVector::~MemoryVector()
 {
-  delete [] m_vec;
+  for (int i=0; i<m_num_pages; i++) {
+    if (m_pages[i] != 0) {
+      delete [] m_pages[i];
+    }
+  }
+  delete [] m_pages;
 }
 
 inline
 void MemoryVector::setSize(uint32 size)
 {
+  if (m_pages != NULL){
+    for (int i=0; i<m_num_pages; i++) {
+      if (m_pages[i] != 0) {
+        delete [] m_pages[i];
+      }
+    }
+    delete [] m_pages;
+  }
   m_size = size;
-  if (m_vec != NULL)
-    delete [] m_vec;
-  m_vec = new uint8[size];
+  assert(size%4096 == 0);
+  m_num_pages = size >> 12;
+  m_pages = new uint8*[m_num_pages];
+  memset(m_pages, 0, m_num_pages * sizeof(uint8*));
 }
 
 inline
 void MemoryVector::write(const Address & paddr, uint8* data, int len)
 {
   assert(paddr.getAddress() + len <= m_size);
-  memcpy(m_vec + paddr.getAddress(), data, len);
+  uint32 page_num = paddr.getAddress() >> 12;
+  if (m_pages[page_num] == 0) {
+    bool all_zeros = true;
+    for (int i=0;i<len;i++) {
+      if (data[i] != 0) {
+        all_zeros = false;
+        break;
+      }
+    }
+    if (all_zeros) return;
+    m_pages[page_num] = new uint8[4096];
+    memset(m_pages[page_num], 0, 4096);
+    uint32 offset = paddr.getAddress() & m_page_offset_mask;
+    memcpy(&m_pages[page_num][offset], data, len);
+  } else {
+    memcpy(&m_pages[page_num][paddr.getAddress()&m_page_offset_mask], data, len);
+  }
 }
 
 inline
 uint8* MemoryVector::read(const Address & paddr, uint8* data, int len)
 {
   assert(paddr.getAddress() + len <= m_size);
-  memcpy(data, m_vec + paddr.getAddress(), len);
+  uint32 page_num = paddr.getAddress() >> 12;
+  if (m_pages[page_num] == 0) {
+    memset(data, 0, len);
+  } else {
+    memcpy(data, &m_pages[page_num][paddr.getAddress()&m_page_offset_mask], len);
+  }
   return data;
 }
 
 inline
-uint8* MemoryVector::getBlockPtr(const Address & paddr)
+uint8* MemoryVector::getBlockPtr(const PhysAddress & paddr)
 {
-  return m_vec + paddr.getAddress();
+  uint32 page_num = paddr.getAddress() >> 12;
+  if (m_pages[page_num] == 0) {
+    m_pages[page_num] = new uint8[4096];
+    memset(m_pages[page_num], 0, 4096);
+  }
+  return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask];
 }
 
 #endif // MEMORYVECTOR_H
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index c693e0f37..a8b4b075a 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -27,6 +27,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "mem/ruby/libruby.hh"
 #include "mem/ruby/common/Global.hh"
 #include "mem/ruby/system/Sequencer.hh"
 #include "mem/ruby/system/System.hh"
@@ -44,14 +45,14 @@
 //Sequencer::Sequencer(int core_id, MessageBuffer* mandatory_q)
 
 #define LLSC_FAIL -2
-ostream& operator<<(std::ostream& out, const SequencerRequest& obj) {
-  out << obj.ruby_request << flush;
-  return out;
-}
-
+long int already = 0;
 Sequencer::Sequencer(const string & name)
   :RubyPort(name)
 {
+  m_store_waiting_on_load_cycles = 0;
+  m_store_waiting_on_store_cycles = 0;
+  m_load_waiting_on_store_cycles = 0;
+  m_load_waiting_on_load_cycles = 0;
 }
 
 void Sequencer::init(const vector<string> & argv)
@@ -65,8 +66,8 @@ void Sequencer::init(const vector<string> & argv)
   m_instCache_ptr = NULL;
   m_dataCache_ptr = NULL;
   m_controller = NULL;
-  m_servicing_atomic = -1;
-  m_atomics_counter = 0;
+  m_atomic_reads = 0;
+  m_atomic_writes = 0;
   for (size_t i=0; i<argv.size(); i+=2) {
     if ( argv[i] == "controller") {
       m_controller = RubySystem::getController(argv[i+1]); // args[i] = "L1Cache"
@@ -110,8 +111,9 @@ void Sequencer::wakeup() {
     SequencerRequest* request = m_readRequestTable.lookup(keys[i]);
     if (current_time - request->issue_time >= m_deadlock_threshold) {
       WARN_MSG("Possible Deadlock detected");
-      WARN_EXPR(request->ruby_request);
+      WARN_EXPR(request);
       WARN_EXPR(m_version);
+      WARN_EXPR(request->ruby_request.paddr);
       WARN_EXPR(keys.size());
       WARN_EXPR(current_time);
       WARN_EXPR(request->issue_time);
@@ -125,7 +127,7 @@ void Sequencer::wakeup() {
     SequencerRequest* request = m_writeRequestTable.lookup(keys[i]);
     if (current_time - request->issue_time >= m_deadlock_threshold) {
       WARN_MSG("Possible Deadlock detected");
-      WARN_EXPR(request->ruby_request);
+      WARN_EXPR(request);
       WARN_EXPR(m_version);
       WARN_EXPR(current_time);
       WARN_EXPR(request->issue_time);
@@ -145,6 +147,14 @@ void Sequencer::wakeup() {
   }
 }
 
+void Sequencer::printStats(ostream & out) const {
+  out << "Sequencer: " << m_name << endl;
+  out << "  store_waiting_on_load_cycles: " << m_store_waiting_on_load_cycles << endl;
+  out << "  store_waiting_on_store_cycles: " << m_store_waiting_on_store_cycles << endl;
+  out << "  load_waiting_on_load_cycles: " << m_load_waiting_on_load_cycles << endl;
+  out << "  load_waiting_on_store_cycles: " << m_load_waiting_on_store_cycles << endl;
+}
+
 void Sequencer::printProgress(ostream& out) const{
   /*
   int total_demand = 0;
@@ -267,6 +277,7 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
   assert(m_writeRequestTable.exist(line_address(address)));
 
   SequencerRequest* request = m_writeRequestTable.lookup(address);
+
   removeRequest(request);
 
   assert((request->ruby_request.type == RubyRequestType_ST) ||
@@ -282,7 +293,7 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
     m_controller->set_atomic(address);
   }
   else if (request->ruby_request.type == RubyRequestType_RMW_Write) {
-    m_controller->clear_atomic();
+    m_controller->clear_atomic(address);
   }
 
   hitCallback(request, data);
@@ -354,47 +365,33 @@ void Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data) {
 }
 
 // Returns true if the sequencer already has a load or store outstanding
-bool Sequencer::isReady(const RubyRequest& request) {
-  // POLINA: check if we are currently flushing the write buffer, if so Ruby is returned as not ready
-  // to simulate stalling of the front-end
-  // Do we stall all the sequencers? If it is atomic instruction - yes!
-  if (m_outstanding_count >= m_max_outstanding_requests) {
-    return false;
-  }
-
-  if( m_writeRequestTable.exist(line_address(Address(request.paddr))) ||
-      m_readRequestTable.exist(line_address(Address(request.paddr))) ){
-    //cout << "OUTSTANDING REQUEST EXISTS " << p << " VER " << m_version << endl;
-    //printProgress(cout);
-    return false;
-  }
-
-  if (m_servicing_atomic != -1 && m_servicing_atomic != (int)request.proc_id) {
-    assert(m_atomics_counter > 0);
-    return false;
-  }
-  else {
-    if (request.type == RubyRequestType_RMW_Read) {
-      if (m_servicing_atomic == -1) {
-        assert(m_atomics_counter == 0);
-        m_servicing_atomic = (int)request.proc_id;
-      }
-      else {
-        assert(m_servicing_atomic == (int)request.proc_id);
-      }
-      m_atomics_counter++;
+int Sequencer::isReady(const RubyRequest& request) {
+  bool is_outstanding_store = m_writeRequestTable.exist(line_address(Address(request.paddr)));
+  bool is_outstanding_load = m_readRequestTable.exist(line_address(Address(request.paddr)));
+  if ( is_outstanding_store ) {
+    if ((request.type == RubyRequestType_LD) ||
+        (request.type == RubyRequestType_IFETCH) ||
+        (request.type == RubyRequestType_RMW_Read)) {
+      m_store_waiting_on_load_cycles++;
+    } else {
+      m_store_waiting_on_store_cycles++;
     }
-    else if (request.type == RubyRequestType_RMW_Write) {
-      assert(m_servicing_atomic == (int)request.proc_id);
-      assert(m_atomics_counter > 0);
-      m_atomics_counter--;
-      if (m_atomics_counter == 0) {
-        m_servicing_atomic = -1;
-      }
+    return LIBRUBY_ALIASED_REQUEST;
+  } else if ( is_outstanding_load ) {
+    if ((request.type == RubyRequestType_ST) ||
+        (request.type == RubyRequestType_RMW_Write) ) {
+      m_load_waiting_on_store_cycles++;
+    } else {
+      m_load_waiting_on_load_cycles++;
     }
+    return LIBRUBY_ALIASED_REQUEST;
   }
 
-  return true;
+  if (m_outstanding_count >= m_max_outstanding_requests) {
+    return LIBRUBY_BUFFER_FULL;
+  }
+  
+  return 1;
 }
 
 bool Sequencer::empty() const {
@@ -405,11 +402,12 @@ bool Sequencer::empty() const {
 int64_t Sequencer::makeRequest(const RubyRequest & request)
 {
   assert(Address(request.paddr).getOffset() + request.len <= RubySystem::getBlockSizeBytes());
-  if (isReady(request)) {
+  int ready = isReady(request);
+  if (ready > 0) {
     int64_t id = makeUniqueRequestID();
     SequencerRequest *srequest = new SequencerRequest(request, id, g_eventQueue_ptr->getTime());
     bool found = insertRequest(srequest);
-    if (!found)
+    if (!found) {
       if (request.type == RubyRequestType_Locked_Write) {
         // NOTE: it is OK to check the locked flag here as the mandatory queue will be checked first
         // ensuring that nothing comes between checking the flag and servicing the store
@@ -420,16 +418,17 @@ int64_t Sequencer::makeRequest(const RubyRequest & request)
           m_dataCache_ptr->clearLocked(line_address(Address(request.paddr)));
         }
       }
-      if (request.type == RubyRequestType_RMW_Write) {
-        m_controller->started_writes();
-      }
       issueRequest(request);
 
-    // TODO: issue hardware prefetches here
-    return id;
-  }
-  else {
-    return -1;
+      // TODO: issue hardware prefetches here
+      return id;
+    }
+    else {
+      assert(0);
+      return 0;
+    }
+  } else {
+    return ready;
   }
 }
 
@@ -439,24 +438,61 @@ void Sequencer::issueRequest(const RubyRequest& request) {
   CacheRequestType ctype;
   switch(request.type) {
   case RubyRequestType_IFETCH:
+    if (m_atomic_reads > 0 && m_atomic_writes == 0) {
+      m_controller->reset_atomics();
+      m_atomic_writes = 0;
+      m_atomic_reads = 0;
+    }
+    else if (m_atomic_writes > 0) {
+      assert(m_atomic_reads > m_atomic_writes);
+      cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
+      assert(false);
+    }
     ctype = CacheRequestType_IFETCH;
     break;
   case RubyRequestType_LD:
+    if (m_atomic_reads > 0 && m_atomic_writes == 0) {
+      m_controller->reset_atomics();
+      m_atomic_writes = 0;
+      m_atomic_reads = 0;
+    }
+    else if (m_atomic_writes > 0) {
+      assert(m_atomic_reads > m_atomic_writes);
+      cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
+      assert(false);
+    }
     ctype = CacheRequestType_LD;
     break;
   case RubyRequestType_ST:
+    if (m_atomic_reads > 0 && m_atomic_writes == 0) {
+      m_controller->reset_atomics();
+      m_atomic_writes = 0;
+      m_atomic_reads = 0;
+    }
+    else if (m_atomic_writes > 0) {
+      assert(m_atomic_reads > m_atomic_writes);
+      cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
+      assert(false);
+    }
     ctype = CacheRequestType_ST;
     break;
   case RubyRequestType_Locked_Read:
-    ctype = CacheRequestType_ST;
-    break;
   case RubyRequestType_Locked_Write:
-    ctype = CacheRequestType_ST;
+    ctype = CacheRequestType_ATOMIC;
     break;
   case RubyRequestType_RMW_Read:
+    assert(m_atomic_writes == 0);
+    m_atomic_reads++;
     ctype = CacheRequestType_ATOMIC;
     break;
   case RubyRequestType_RMW_Write:
+    assert(m_atomic_reads > 0);
+    assert(m_atomic_writes < m_atomic_reads);
+    m_atomic_writes++;
+    if (m_atomic_reads == m_atomic_writes) {
+      m_atomic_reads = 0;
+      m_atomic_writes = 0;
+    }
     ctype = CacheRequestType_ATOMIC;
     break;
   default:
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index cf12c2a0b..ce53dd8d7 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -86,10 +86,11 @@ public:
 
   // called by Tester or Simics
   int64_t makeRequest(const RubyRequest & request);
-  bool isReady(const RubyRequest& request);
+  int isReady(const RubyRequest& request);
   bool empty() const;
 
   void print(ostream& out) const;
+  void printStats(ostream & out) const;
   void checkCoherence(const Address& address);
 
   //  bool getRubyMemoryValue(const Address& addr, char* value, unsigned int size_in_bytes);
@@ -127,8 +128,13 @@ private:
   // Global outstanding request count, across all request tables
   int m_outstanding_count;
   bool m_deadlock_check_scheduled;
-  int m_servicing_atomic;
-  int m_atomics_counter;
+  int m_atomic_reads;
+  int m_atomic_writes;
+
+  int m_store_waiting_on_load_cycles;
+  int m_store_waiting_on_store_cycles;
+  int m_load_waiting_on_store_cycles;
+  int m_load_waiting_on_load_cycles;
 };
 
 // Output operator declaration
diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc
index ad67cdc80..4ce919618 100644
--- a/src/mem/ruby/system/System.cc
+++ b/src/mem/ruby/system/System.cc
@@ -335,6 +335,10 @@ void RubySystem::printStats(ostream& out)
 
   m_profiler_ptr->printStats(out);
   m_network_ptr->printStats(out);
+  for (map<string, Sequencer*>::const_iterator it = m_sequencers.begin();
+       it != m_sequencers.end(); it++) {
+    (*it).second->printStats(out);
+  }
   for (map<string, CacheMemory*>::const_iterator it = m_caches.begin();
        it != m_caches.end(); it++) {
     (*it).second->printStats(out);
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py
index 008438869..af9850896 100644
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -186,8 +186,8 @@ public:
     void printConfig(ostream& out) const;
     void wakeup();
     void set_atomic(Address addr);
-    void started_writes();
-    void clear_atomic();
+    void clear_atomic(Address addr);
+    void reset_atomics();
     void printStats(ostream& out) const { s_profiler.dumpStats(out); }
     void clearStats() { s_profiler.clearStats(); }
 private:
@@ -201,7 +201,6 @@ private:
         if self.ident == "L1Cache":
             code('''
 int servicing_atomic;
-bool started_receiving_writes;
 Address locked_read_request1;
 Address locked_read_request2;
 Address locked_read_request3;
@@ -301,7 +300,6 @@ $c_ident::$c_ident(const string &name)
         if self.ident == "L1Cache":
             code('''
 servicing_atomic = 0;
-started_receiving_writes = false;
 locked_read_request1 = Address(-1);
 locked_read_request2 = Address(-1);
 locked_read_request3 = Address(-1);
@@ -600,93 +598,6 @@ void ${ident}_Controller::wakeup()
             code('// ${ident}InPort $port')
             output = port["c_code_in_port"]
 
-            pos = output.find("TransitionResult result = doTransition((L1Cache_mandatory_request_type_to_event(((*in_msg_ptr)).m_Type)), L1Cache_getState(addr), addr);")
-            assert pos >= 0
-            atomics_string = '''
-if ((((*in_msg_ptr)).m_Type) == CacheRequestType_ATOMIC) {
-    if (servicing_atomic == 0) {
-        if (locked_read_request1 == Address(-1)) {
-            assert(read_counter == 0);
-            locked_read_request1 = addr;
-            assert(read_counter == 0);
-            read_counter++;
-        }
-        else if (addr == locked_read_request1) {
-            ; // do nothing
-        }
-        else {
-            assert(0); // should never be here if servicing one request at a time
-        }
-    }
-    else if (!started_receiving_writes) {
-        if (servicing_atomic == 1) {
-            if (locked_read_request2 == Address(-1)) {
-                assert(locked_read_request1 != Address(-1));
-                assert(read_counter == 1);
-                locked_read_request2 = addr;
-                assert(read_counter == 1);
-                read_counter++;
-            }
-            else if (addr == locked_read_request2) {
-                ; // do nothing
-            }
-            else {
-                assert(0); // should never be here if servicing one request at a time
-            }
-        }
-        else if (servicing_atomic == 2) {
-            if (locked_read_request3 == Address(-1)) {
-                assert(locked_read_request1 != Address(-1));
-                assert(locked_read_request2 != Address(-1));
-                assert(read_counter == 1);
-                locked_read_request3 = addr;
-                assert(read_counter == 2);
-                read_counter++;
-            }
-            else if (addr == locked_read_request3) {
-                ; // do nothing
-            }
-            else {
-                assert(0); // should never be here if servicing one request at a time
-            }
-        }
-        else if (servicing_atomic == 3) {
-            if (locked_read_request4 == Address(-1)) {
-                assert(locked_read_request1 != Address(-1));
-                assert(locked_read_request2 != Address(-1));
-                assert(locked_read_request3 != Address(-1));
-                assert(read_counter == 1);
-                locked_read_request4 = addr;
-                assert(read_counter == 3);
-                read_counter++;
-            }
-            else if (addr == locked_read_request4) {
-                ; // do nothing
-            }
-            else {
-                assert(0); // should never be here if servicing one request at a time
-            }
-        }
-        else {
-            assert(0);
-        }
-    }
-}
-else {
-    if (servicing_atomic > 0) {
-        // reset
-        servicing_atomic = 0;
-        read_counter = 0;
-        started_receiving_writes = false;
-        locked_read_request1 = Address(-1);
-        locked_read_request2 = Address(-1);
-        locked_read_request3 = Address(-1);
-        locked_read_request4 = Address(-1);
-    }
-}
-'''
-
-            output = output[:pos] + atomics_string + output[pos:]
             code('$output')
 
         for port in self.in_ports:
@@ -695,17 +606,14 @@ else {
                 continue
 
             if ident == "L1Cache":
-                if str(port).find("forwardRequestNetwork_in") >= 0:
+                if (str(port).find("forwardRequestNetwork_in") >= 0 or str(port).find("requestNetwork_in") >= 0 or str(port).find("requestIntraChipL1Network_in") >= 0):
                     code('''
 bool postpone = false;
 if ((((*m_L1Cache_forwardToCache_ptr)).isReady())) {
     const RequestMsg* in_msg_ptr;
     in_msg_ptr = dynamic_cast<const RequestMsg*>(((*m_L1Cache_forwardToCache_ptr)).peek());
-    if ((((servicing_atomic == 1)  && (locked_read_request1 == ((*in_msg_ptr)).m_Address)) || 
-         ((servicing_atomic == 2)  && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address)) || 
-         ((servicing_atomic == 3)  && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address || locked_read_request3 == ((*in_msg_ptr)).m_Address)) || 
-         ((servicing_atomic == 4)  && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address || locked_read_request3 == ((*in_msg_ptr)).m_Address || locked_read_request1 == ((*in_msg_ptr)).m_Address)))) {
-    postpone = true;
+    if ((((servicing_atomic > 0)  && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address || locked_read_request3 == ((*in_msg_ptr)).m_Address || locked_read_request1 == ((*in_msg_ptr)).m_Address)))) {
+            postpone = true;
     }
 }
 if (!postpone) {
@@ -716,7 +624,7 @@ if (!postpone) {
             code.dedent()
 
             if ident == "L1Cache":
-                if str(port).find("forwardRequestNetwork_in") >= 0:
+                if (str(port).find("forwardRequestNetwork_in") >= 0 or str(port).find("requestNetwork_in") >= 0 or str(port).find("requestIntraChipL1Network_in") >= 0):
                     code.dedent()
                     code('}')
                     code.indent()
@@ -735,31 +643,62 @@ if (!postpone) {
 void ${ident}_Controller::set_atomic(Address addr)
 {
     servicing_atomic++; 
+    switch (servicing_atomic) { 
+      case(1):
+        assert(locked_read_request1 == Address(-1));
+        locked_read_request1 = addr;
+        break;
+      case(2):
+        assert(locked_read_request2 == Address(-1));
+        locked_read_request2 = addr;
+        break;
+      case(3):
+        assert(locked_read_request3 == Address(-1));
+        locked_read_request3 = addr;
+        break;
+      case(4):
+        assert(locked_read_request4 == Address(-1));
+        locked_read_request4 = addr;
+        break;
+      default:
+        assert(0);
+
+    }    
 }
 
-void ${ident}_Controller::started_writes()
+void ${ident}_Controller::clear_atomic(Address addr)
 {
-    started_receiving_writes = true; 
+
+    assert(servicing_atomic > 0);
+    if (addr == locked_read_request1)
+        locked_read_request1 = Address(-1);
+    else if (addr == locked_read_request2)
+        locked_read_request2 = Address(-1);
+    else if (addr == locked_read_request3)
+        locked_read_request3 = Address(-1);
+    else if (addr == locked_read_request4)
+        locked_read_request4 = Address(-1);
+    else
+       assert(0);
+    servicing_atomic--;
+
 }
 
-void ${ident}_Controller::clear_atomic()
+void ${ident}_Controller::reset_atomics()
 {
-    assert(servicing_atomic > 0); 
-    read_counter--; 
-    servicing_atomic--; 
-    if (read_counter == 0) { 
-        servicing_atomic = 0; 
-        started_receiving_writes = false; 
-        locked_read_request1 = Address(-1); 
-        locked_read_request2 = Address(-1); 
-        locked_read_request3 = Address(-1); 
-        locked_read_request4 = Address(-1); 
-    } 
+
+    servicing_atomic = 0;
+    locked_read_request1 = Address(-1);
+    locked_read_request2 = Address(-1);
+    locked_read_request3 = Address(-1);
+    locked_read_request4 = Address(-1);
+
 }
+
 ''')
         else:
             code('''
-void ${ident}_Controller::started_writes()
+void ${ident}_Controller::reset_atomics()
 {
     assert(0); 
 }
@@ -769,7 +708,7 @@ void ${ident}_Controller::set_atomic(Address addr)
     assert(0); 
 }
 
-void ${ident}_Controller::clear_atomic()
+void ${ident}_Controller::clear_atomic(Address addr)
 {
     assert(0); 
 }
diff --git a/src/mem/slicc/symbols/Type.py b/src/mem/slicc/symbols/Type.py
index 2541296dc..bafc6ea9e 100644
--- a/src/mem/slicc/symbols/Type.py
+++ b/src/mem/slicc/symbols/Type.py
@@ -430,8 +430,10 @@ enum ${{self.c_ident}} {
         # For each field
         for i,(ident,enum) in enumerate(self.enums.iteritems()):
             desc = enum.get("desc", "No description avaliable")
-            init = ' = %s_FIRST' % self.c_ident if i == 0 else ''
-
+            if i == 0: 
+                init = ' = %s_FIRST' % self.c_ident 
+            else:
+                init = ''
             code('${{self.c_ident}}_${{enum.ident}}$init, /**< $desc */')
         code.dedent()
         code('''
diff --git a/tests/configs/memtest-ruby.py b/tests/configs/memtest-ruby.py
index c564ec600..d2be2791e 100644
--- a/tests/configs/memtest-ruby.py
+++ b/tests/configs/memtest-ruby.py
@@ -35,10 +35,7 @@ nb_cores = 8
 cpus = [ MemTest() for i in xrange(nb_cores) ]
 
 import ruby_config
-ruby_memory = ruby_config.generate("MI_example-homogeneous.rb",
-                                   cores = nb_cores,
-                                   cache_size = 256,
-                                   cache_assoc = 2)
+ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores)
 
 # system simulated
 system = System(cpu = cpus, funcmem = PhysicalMemory(),
diff --git a/tests/configs/ruby_config.py b/tests/configs/ruby_config.py
index fec7bd36c..190337e67 100644
--- a/tests/configs/ruby_config.py
+++ b/tests/configs/ruby_config.py
@@ -8,10 +8,11 @@ from m5.params import *
 
 def generate(config_file, cores=1, memories=1, memory_size=1024, \
              cache_size=32768, cache_assoc=8, dmas=1,
-             ruby_tick='1t', ports_per_cpu=2):
+             ruby_tick='1t', ports_per_cpu=2, protocol='MOESI_CMP_directory'):
     default = joinpath(dirname(__file__), '../../src/mem/ruby/config')
     ruby_config = os.environ.get('RUBY_CONFIG', default)
     args = [ "ruby", "-I", ruby_config, joinpath(ruby_config, "print_cfg.rb"),
+             "-c", str(protocol),
              "-r", joinpath(ruby_config, config_file), "-p", str(cores),
              "-m", str(memories), "-s", str(memory_size), "-C", str(cache_size),
              "-A", str(cache_assoc), "-D", str(dmas)]
diff --git a/util/style.py b/util/style.py
index 461573941..2c2714f0c 100644
--- a/util/style.py
+++ b/util/style.py
@@ -65,23 +65,23 @@ def whitespace_file(filename):
     if filename.startswith("SCons"):
         return True
 
-    return False
+    return True 
 
 format_types = ( 'C', 'C++' )
 def format_file(filename):
     if file_type(filename) in format_types:
         return True
 
-    return False
+    return True 
 
 def checkwhite_line(line):
     match = lead.search(line)
     if match and match.group(1).find('\t') != -1:
-        return False
+        return True 
 
     match = trail.search(line)
     if match:
-        return False
+        return True
 
     return True