diff options
author | Derek Hower <drh5@cs.wisc.edu> | 2010-01-22 17:23:21 -0600 |
---|---|---|
committer | Derek Hower <drh5@cs.wisc.edu> | 2010-01-22 17:23:21 -0600 |
commit | 589218168c5ae1ed143372e43dbc468369a1bb8f (patch) | |
tree | 5e5154c08e7def83b49b16769c213b0e78ea41fb /src | |
parent | 1c448e2ab00acb05d368c9de62c5cf08e64c6213 (diff) | |
parent | f7de30ab1a9e1655de8bf7d4c15007a682a2a629 (diff) | |
download | gem5-589218168c5ae1ed143372e43dbc468369a1bb8f.tar.xz |
Automated merge with ssh://hg@m5sim.org/m5
Diffstat (limited to 'src')
40 files changed, 531 insertions, 1533 deletions
diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm index 32669190f..39ede67ca 100644 --- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm @@ -27,12 +27,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id: MSI_MOSI_CMP_directory-L1cache.sm 1.10 05/01/19 15:55:40-06:00 beckmann@s0-28.cs.wisc.edu $ - * - */ - - machine(L1Cache, "MSI Directory L1 Cache CMP") : int l1_request_latency, int l1_response_latency, @@ -47,15 +41,15 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") // a local L1 -> this L2 bank, currently ordered with directory forwarded requests MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false"; // a local L1 -> this L2 bank - MessageBuffer responseFromL1Cache, network="To", virtual_network="3", ordered="false"; - MessageBuffer unblockFromL1Cache, network="To", virtual_network="4", ordered="false"; + MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false"; + MessageBuffer unblockFromL1Cache, network="To", virtual_network="2", ordered="false"; // To this node's L1 cache FROM the network // a L2 bank -> this L1 - MessageBuffer requestToL1Cache, network="From", virtual_network="1", ordered="false"; + MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false"; // a L2 bank -> this L1 - MessageBuffer responseToL1Cache, network="From", virtual_network="3", ordered="false"; + MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false"; // STATES enumeration(State, desc="Cache states", default="L1Cache_State_I") { @@ -244,7 +238,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") // Response IntraChip L1 Network - response msg to this L1 cache in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache) { if (responseIntraChipL1Network_in.isReady()) { - peek(responseIntraChipL1Network_in, ResponseMsg) { + peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") { assert(in_msg.Destination.isElement(machineID)); if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) { trigger(Event:Data_Exclusive, in_msg.Address); @@ -277,7 +271,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") // Request InterChip network - request from this L1 cache to the shared L2 in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache) { if(requestIntraChipL1Network_in.isReady()) { - peek(requestIntraChipL1Network_in, RequestMsg) { + peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") { assert(in_msg.Destination.isElement(machineID)); if (in_msg.Type == CoherenceRequestType:INV) { trigger(Event:Inv, in_msg.Address); @@ -298,7 +292,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") // Mandatory Queue betweens Node's CPU and it's L1 caches in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") { if (mandatoryQueue_in.isReady()) { - peek(mandatoryQueue_in, CacheMsg) { + peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") { // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache diff --git a/src/mem/protocol/MESI_CMP_directory-L2cache.sm b/src/mem/protocol/MESI_CMP_directory-L2cache.sm index 6439e4fb3..b82d77ddb 100644 --- a/src/mem/protocol/MESI_CMP_directory-L2cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L2cache.sm @@ -40,14 +40,14 @@ machine(L2Cache, "MESI Directory L2 Cache CMP") // L2 BANK QUEUES // From local bank of L2 cache TO the network - MessageBuffer DirRequestFromL2Cache, network="To", virtual_network="2", ordered="false"; // this L2 bank -> Memory - MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="1", ordered="false"; // this L2 bank -> a local L1 - MessageBuffer responseFromL2Cache, network="To", virtual_network="3", ordered="false"; // this L2 bank -> a local L1 || Memory + MessageBuffer DirRequestFromL2Cache, network="To", virtual_network="0", ordered="false"; // this L2 bank -> Memory + MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="0", ordered="false"; // this L2 bank -> a local L1 + MessageBuffer responseFromL2Cache, network="To", virtual_network="1", ordered="false"; // this L2 bank -> a local L1 || Memory // FROM the network to this local bank of L2 cache - MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false"; // a local L1 || Memory -> this L2 bank + MessageBuffer unblockToL2Cache, network="From", virtual_network="2", ordered="false"; // a local L1 || Memory -> this L2 bank MessageBuffer L1RequestToL2Cache, network="From", virtual_network="0", ordered="false"; // a local L1 -> this L2 bank - MessageBuffer responseToL2Cache, network="From", virtual_network="3", ordered="false"; // a local L1 || Memory -> this L2 bank + MessageBuffer responseToL2Cache, network="From", virtual_network="1", ordered="false"; // a local L1 || Memory -> this L2 bank // MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false"; // a local L1 || Memory -> this L2 bank // STATES diff --git a/src/mem/protocol/MESI_CMP_directory-dma.sm b/src/mem/protocol/MESI_CMP_directory-dma.sm index 191df5dfa..143c465ef 100644 --- a/src/mem/protocol/MESI_CMP_directory-dma.sm +++ b/src/mem/protocol/MESI_CMP_directory-dma.sm @@ -3,8 +3,8 @@ machine(DMA, "DMA Controller") : int request_latency { - MessageBuffer responseFromDir, network="From", virtual_network="6", ordered="true", no_vector="true"; - MessageBuffer reqToDirectory, network="To", virtual_network="7", ordered="false", no_vector="true"; + MessageBuffer responseFromDir, network="From", virtual_network="1", ordered="true", no_vector="true"; + MessageBuffer reqToDirectory, network="To", virtual_network="0", ordered="false", no_vector="true"; enumeration(State, desc="DMA states", default="DMA_State_READY") { READY, desc="Ready to accept a new request"; @@ -51,13 +51,13 @@ machine(DMA, "DMA Controller") } } - in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") { + in_port(dmaResponseQueue_in, ResponseMsg, responseFromDir, desc="...") { if (dmaResponseQueue_in.isReady()) { - peek( dmaResponseQueue_in, DMAResponseMsg) { - if (in_msg.Type == DMAResponseType:ACK) { - trigger(Event:Ack, in_msg.LineAddress); - } else if (in_msg.Type == DMAResponseType:DATA) { - trigger(Event:Data, in_msg.LineAddress); + peek( dmaResponseQueue_in, ResponseMsg) { + if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack, makeLineAddress(in_msg.Address)); + } else if (in_msg.Type == CoherenceResponseType:DATA) { + trigger(Event:Data, makeLineAddress(in_msg.Address)); } else { error("Invalid response type"); } @@ -67,10 +67,9 @@ machine(DMA, "DMA Controller") action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") { peek(dmaRequestQueue_in, SequencerMsg) { - enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { - out_msg.PhysicalAddress := in_msg.PhysicalAddress; - out_msg.LineAddress := in_msg.LineAddress; - out_msg.Type := DMARequestType:READ; + enqueue(reqToDirectory_out, RequestMsg, latency=request_latency) { + out_msg.Address := in_msg.PhysicalAddress; + out_msg.Type := CoherenceRequestType:DMA_READ; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; out_msg.Destination.add(map_Address_to_Directory(address)); @@ -81,10 +80,9 @@ machine(DMA, "DMA Controller") action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") { peek(dmaRequestQueue_in, SequencerMsg) { - enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { - out_msg.PhysicalAddress := in_msg.PhysicalAddress; - out_msg.LineAddress := in_msg.LineAddress; - out_msg.Type := DMARequestType:WRITE; + enqueue(reqToDirectory_out, RequestMsg, latency=request_latency) { + out_msg.Address := in_msg.PhysicalAddress; + out_msg.Type := CoherenceRequestType:DMA_WRITE; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; out_msg.Destination.add(map_Address_to_Directory(address)); @@ -94,13 +92,11 @@ machine(DMA, "DMA Controller") } action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { - peek (dmaResponseQueue_in, DMAResponseMsg) { - dma_sequencer.ackCallback(); - } + dma_sequencer.ackCallback(); } action(d_dataCallback, "d", desc="Write data to dma sequencer") { - peek (dmaResponseQueue_in, DMAResponseMsg) { + peek (dmaResponseQueue_in, ResponseMsg) { dma_sequencer.dataCallback(in_msg.DataBlk); } } diff --git a/src/mem/protocol/MESI_CMP_directory-mem.sm b/src/mem/protocol/MESI_CMP_directory-mem.sm index f5a2e431b..7e30883b0 100644 --- a/src/mem/protocol/MESI_CMP_directory-mem.sm +++ b/src/mem/protocol/MESI_CMP_directory-mem.sm @@ -40,13 +40,11 @@ machine(Directory, "MESI_CMP_filter_directory protocol") int directory_latency { - MessageBuffer requestToDir, network="From", virtual_network="2", ordered="false"; - MessageBuffer responseToDir, network="From", virtual_network="3", ordered="false"; - MessageBuffer responseFromDir, network="To", virtual_network="3", ordered="false"; - - MessageBuffer dmaRequestFromDir, network="To", virtual_network="6", ordered="true"; - MessageBuffer dmaRequestToDir, network="From", virtual_network="7", ordered="true"; + MessageBuffer requestToDir, network="From", virtual_network="0", ordered="false"; + MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false"; + MessageBuffer requestFromDir, network="To", virtual_network="0", ordered="false"; + MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; // STATES enumeration(State, desc="Directory states", default="Directory_State_I") { @@ -118,9 +116,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol") // DirectoryMemory directory, constructor_hack="i"; // MemoryControl memBuffer, constructor_hack="i"; - DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; + DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory"])'; - MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; + MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_control"])'; TBETable TBEs, template_hack="<Directory_TBE>"; @@ -167,32 +165,19 @@ machine(Directory, "MESI_CMP_filter_directory protocol") // ** OUT_PORTS ** out_port(responseNetwork_out, ResponseMsg, responseFromDir); out_port(memQueue_out, MemoryMsg, memBuffer); - out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaRequestFromDir); // ** IN_PORTS ** -//added by SS for dma - in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) { - if (dmaRequestQueue_in.isReady()) { - peek(dmaRequestQueue_in, DMARequestMsg) { - if (in_msg.Type == DMARequestType:READ) { - trigger(Event:DMA_READ, in_msg.LineAddress); - } else if (in_msg.Type == DMARequestType:WRITE) { - trigger(Event:DMA_WRITE, in_msg.LineAddress); - } else { - error("Invalid message"); - } - } - } - } - - in_port(requestNetwork_in, RequestMsg, requestToDir) { if (requestNetwork_in.isReady()) { peek(requestNetwork_in, RequestMsg) { assert(in_msg.Destination.isElement(machineID)); if (isGETRequest(in_msg.Type)) { trigger(Event:Fetch, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:DMA_READ) { + trigger(Event:DMA_READ, makeLineAddress(in_msg.Address)); + } else if (in_msg.Type == CoherenceRequestType:DMA_WRITE) { + trigger(Event:DMA_WRITE, makeLineAddress(in_msg.Address)); } else { DEBUG_EXPR(in_msg); error("Invalid message"); @@ -328,7 +313,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } //added by SS for dma action(qf_queueMemoryFetchRequestDMA, "qfd", desc="Queue off-chip fetch request") { - peek(dmaRequestQueue_in, DMARequestMsg) { + peek(requestNetwork_in, RequestMsg) { enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) { out_msg.Address := address; out_msg.Type := MemoryRequestType:MEMORY_READ; @@ -342,14 +327,14 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } action(p_popIncomingDMARequestQueue, "p", desc="Pop incoming DMA queue") { - dmaRequestQueue_in.dequeue(); + requestNetwork_in.dequeue(); } action(dr_sendDMAData, "dr", desc="Send Data to DMA controller from directory") { peek(memQueue_in, MemoryMsg) { - enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) { - out_msg.PhysicalAddress := address; - out_msg.Type := DMAResponseType:DATA; + enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA; out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be out_msg.Destination.add(map_Address_to_DMA(address)); out_msg.MessageSize := MessageSizeType:Response_Data; @@ -358,15 +343,13 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } action(dw_writeDMAData, "dw", desc="DMA Write data to memory") { - peek(dmaRequestQueue_in, DMARequestMsg) { - //directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len); - - directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.PhysicalAddress), in_msg.Len); + peek(requestNetwork_in, RequestMsg) { + directory[address].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.Address), in_msg.Len); } } action(qw_queueMemoryWBRequest_partial, "qwp", desc="Queue off-chip writeback request") { - peek(dmaRequestQueue_in, DMARequestMsg) { + peek(requestNetwork_in, RequestMsg) { enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) { out_msg.Address := address; out_msg.Type := MemoryRequestType:MEMORY_WB; @@ -384,9 +367,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } action(da_sendDMAAck, "da", desc="Send Ack to DMA controller") { - enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) { - out_msg.PhysicalAddress := address; - out_msg.Type := DMAResponseType:ACK; + enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:ACK; out_msg.Destination.add(map_Address_to_DMA(address)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } @@ -397,7 +380,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } action(zz_recycleDMAQueue, "zz", desc="recycle DMA queue") { - dmaRequestQueue_in.recycle(); + requestNetwork_in.recycle(); } @@ -410,12 +393,12 @@ machine(Directory, "MESI_CMP_filter_directory protocol") action(inv_sendCacheInvalidate, "inv", desc="Invalidate a cache block") { - peek(dmaRequestQueue_in, DMARequestMsg) { + peek(requestNetwork_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:INV; out_msg.Sender := machineID; - out_msg.Destination := directory[in_msg.PhysicalAddress].Owner; + out_msg.Destination := directory[address].Owner; out_msg.MessageSize := MessageSizeType:Response_Control; } } @@ -424,9 +407,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol") action(drp_sendDMAData, "drp", desc="Send Data to DMA controller from incoming PUTX") { peek(responseNetwork_in, ResponseMsg) { - enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) { - out_msg.PhysicalAddress := address; - out_msg.Type := DMAResponseType:DATA; + enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA; out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be out_msg.Destination.add(map_Address_to_DMA(address)); out_msg.MessageSize := MessageSizeType:Response_Data; @@ -439,10 +422,10 @@ machine(Directory, "MESI_CMP_filter_directory protocol") } action(v_allocateTBE, "v", desc="Allocate TBE") { - peek(dmaRequestQueue_in, DMARequestMsg) { + peek(requestNetwork_in, RequestMsg) { TBEs.allocate(address); TBEs[address].DataBlk := in_msg.DataBlk; - TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].PhysicalAddress := in_msg.Address; TBEs[address].Len := in_msg.Len; } } @@ -514,7 +497,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") //added by SS for dma support transition(I, DMA_READ, ID) { qf_queueMemoryFetchRequestDMA; - p_popIncomingDMARequestQueue; + j_popIncomingRequestQueue; } transition(ID, Memory_Data, I) { @@ -525,7 +508,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") transition(I, DMA_WRITE, ID_W) { dw_writeDMAData; qw_queueMemoryWBRequest_partial; - p_popIncomingDMARequestQueue; + j_popIncomingRequestQueue; } transition(ID_W, Memory_Ack, I) { @@ -544,7 +527,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") transition(M, DMA_READ, M_DRD) { inv_sendCacheInvalidate; - p_popIncomingDMARequestQueue; + j_popIncomingRequestQueue; } transition(M_DRD, Data, M_DRDI) { @@ -563,7 +546,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") transition(M, DMA_WRITE, M_DWR) { v_allocateTBE; inv_sendCacheInvalidate; - p_popIncomingDMARequestQueue; + j_popIncomingRequestQueue; } transition(M_DWR, Data, M_DWRI) { diff --git a/src/mem/protocol/MESI_CMP_directory-msg.sm b/src/mem/protocol/MESI_CMP_directory-msg.sm index 15934e6b2..74c1e56f0 100644 --- a/src/mem/protocol/MESI_CMP_directory-msg.sm +++ b/src/mem/protocol/MESI_CMP_directory-msg.sm @@ -70,7 +70,8 @@ enumeration(CoherenceRequestType, desc="...") { WB_NACK, desc="Writeback neg. ack"; FWD, desc="Generic FWD"; - + DMA_READ, desc="DMA Read"; + DMA_WRITE, desc="DMA Write"; } // CoherenceResponseType @@ -95,6 +96,7 @@ structure(RequestMsg, desc="...", interface="NetworkMessage") { NetDest Destination, desc="What components receive the request, includes MachineType and num"; MessageSizeType MessageSize, desc="size category of the message"; DataBlock DataBlk, desc="Data for the cache line (if PUTX)"; + int Len; bool Dirty, default="false", desc="Dirty bit"; PrefetchBit Prefetch, desc="Is this a prefetch request"; } @@ -111,68 +113,4 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") { MessageSizeType MessageSize, desc="size category of the message"; } -enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") { - READ, desc="Memory Read"; - WRITE, desc="Memory Write"; - NULL, desc="Invalid"; -} - -enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") { - DATA, desc="DATA read"; - ACK, desc="ACK write"; - NULL, desc="Invalid"; -} - -structure(DMARequestMsg, desc="...", interface="NetworkMessage") { - DMARequestType Type, desc="Request type (read/write)"; - Address PhysicalAddress, desc="Physical address for this request"; - Address LineAddress, desc="Line address for this request"; - NetDest Destination, desc="Destination"; - DataBlock DataBlk, desc="DataBlk attached to this request"; - int Offset, desc="The offset into the datablock"; - int Len, desc="The length of the request"; - MessageSizeType MessageSize, desc="size category of the message"; -} - -structure(DMAResponseMsg, desc="...", interface="NetworkMessage") { - DMAResponseType Type, desc="Response type (DATA/ACK)"; - Address PhysicalAddress, desc="Physical address for this request"; - Address LineAddress, desc="Line address for this request"; - NetDest Destination, desc="Destination"; - DataBlock DataBlk, desc="DataBlk attached to this request"; - MessageSizeType MessageSize, desc="size category of the message"; -} - - - -/* -GenericRequestType convertToGenericType(CoherenceRequestType type) { - if(type == CoherenceRequestType:PUTX) { - return GenericRequestType:PUTX; - } else if(type == CoherenceRequestType:GETS) { - return GenericRequestType:GETS; - } else if(type == CoherenceRequestType:GET_INSTR) { - return GenericRequestType:GET_INSTR; - } else if(type == CoherenceRequestType:GETX) { - return GenericRequestType:GETX; - } else if(type == CoherenceRequestType:UPGRADE) { - return GenericRequestType:UPGRADE; - } else if(type == CoherenceRequestType:PUTS) { - return GenericRequestType:PUTS; - } else if(type == CoherenceRequestType:INV) { - return GenericRequestType:INV; - } else if(type == CoherenceRequestType:INV_S) { - return GenericRequestType:INV_S; - } else if(type == CoherenceRequestType:L1_DG) { - return GenericRequestType:DOWNGRADE; - } else if(type == CoherenceRequestType:WB_ACK) { - return GenericRequestType:WB_ACK; - } else if(type == CoherenceRequestType:EXE_ACK) { - return GenericRequestType:EXE_ACK; - } else { - DEBUG_EXPR(type); - error("invalid CoherenceRequestType"); - } -} -*/ diff --git a/src/mem/protocol/MI_example-cache.sm b/src/mem/protocol/MI_example-cache.sm index 915a0eb99..2f637e7b7 100644 --- a/src/mem/protocol/MI_example-cache.sm +++ b/src/mem/protocol/MI_example-cache.sm @@ -17,6 +17,7 @@ machine(L1Cache, "MI Example L1 Cache") II, desc="Not Present/Invalid, issued PUT"; M, desc="Modified"; MI, desc="Modified, issued PUT"; + MII, desc="Modified, issued PUTX, received nack"; IS, desc="Issued request for LOAD/IFETCH"; IM, desc="Issued request for STORE/ATOMIC"; @@ -137,7 +138,7 @@ machine(L1Cache, "MI Example L1 Cache") in_port(forwardRequestNetwork_in, RequestMsg, forwardToCache) { if (forwardRequestNetwork_in.isReady()) { - peek(forwardRequestNetwork_in, RequestMsg) { + peek(forwardRequestNetwork_in, RequestMsg, block_on="Address") { if (in_msg.Type == CoherenceRequestType:GETX) { trigger(Event:Fwd_GETX, in_msg.Address); } @@ -159,7 +160,7 @@ machine(L1Cache, "MI Example L1 Cache") in_port(responseNetwork_in, ResponseMsg, responseToCache) { if (responseNetwork_in.isReady()) { - peek(responseNetwork_in, ResponseMsg) { + peek(responseNetwork_in, ResponseMsg, block_on="Address") { if (in_msg.Type == CoherenceResponseType:DATA) { trigger(Event:Data, in_msg.Address); } @@ -173,7 +174,7 @@ machine(L1Cache, "MI Example L1 Cache") // Mandatory Queue in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") { if (mandatoryQueue_in.isReady()) { - peek(mandatoryQueue_in, CacheMsg) { + peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") { if (cacheMemory.isTagPresent(in_msg.LineAddress) == false && @@ -388,6 +389,16 @@ machine(L1Cache, "MI Example L1 Cache") o_popForwardedRequestQueue; } + transition(MI, Writeback_Nack, MII) { + o_popForwardedRequestQueue; + } + + transition(MII, Fwd_GETX, I) { + ee_sendDataFromTBE; + w_deallocateTBE; + o_popForwardedRequestQueue; + } + transition(II, Writeback_Nack, I) { w_deallocateTBE; o_popForwardedRequestQueue; diff --git a/src/mem/protocol/MI_example-dir.sm b/src/mem/protocol/MI_example-dir.sm index 0061a2838..1f64d25df 100644 --- a/src/mem/protocol/MI_example-dir.sm +++ b/src/mem/protocol/MI_example-dir.sm @@ -83,9 +83,9 @@ machine(Directory, "Directory protocol") } // ** OBJECTS ** - DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; + DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory"])'; - MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; + MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_control"])'; TBETable TBEs, template_hack="<Directory_TBE>"; diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm index db2efd3e7..f6b1d4f38 100644 --- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm @@ -306,7 +306,7 @@ machine(L1Cache, "Directory protocol") // Request Network in_port(requestNetwork_in, RequestMsg, requestToL1Cache) { if (requestNetwork_in.isReady()) { - peek(requestNetwork_in, RequestMsg) { + peek(requestNetwork_in, RequestMsg, block_on="Address") { assert(in_msg.Destination.isElement(machineID)); DEBUG_EXPR("MRM_DEBUG: L1 received"); DEBUG_EXPR(in_msg.Type); @@ -338,7 +338,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT // Response Network in_port(responseToL1Cache_in, ResponseMsg, responseToL1Cache) { if (responseToL1Cache_in.isReady()) { - peek(responseToL1Cache_in, ResponseMsg) { + peek(responseToL1Cache_in, ResponseMsg, block_on="Address") { if (in_msg.Type == CoherenceResponseType:ACK) { trigger(Event:Ack, in_msg.Address); } else if (in_msg.Type == CoherenceResponseType:DATA) { @@ -356,7 +356,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT // Mandatory Queue betweens Node's CPU and it's L1 caches in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") { if (mandatoryQueue_in.isReady()) { - peek(mandatoryQueue_in, CacheMsg) { + peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") { // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache diff --git a/src/mem/protocol/MOESI_CMP_directory-dir.sm b/src/mem/protocol/MOESI_CMP_directory-dir.sm index 8e48fc9ab..9e6cc918d 100644 --- a/src/mem/protocol/MOESI_CMP_directory-dir.sm +++ b/src/mem/protocol/MOESI_CMP_directory-dir.sm @@ -127,8 +127,8 @@ machine(Directory, "Directory protocol") // ** OBJECTS ** - DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; - MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; + DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory"])'; + MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_control"])'; TBETable TBEs, template_hack="<Directory_TBE>"; State getState(Address addr) { diff --git a/src/mem/protocol/RubySlicc_ComponentMapping.sm b/src/mem/protocol/RubySlicc_ComponentMapping.sm index 2a027554e..891820c46 100644 --- a/src/mem/protocol/RubySlicc_ComponentMapping.sm +++ b/src/mem/protocol/RubySlicc_ComponentMapping.sm @@ -33,6 +33,7 @@ int getNumberOfLastLevelCaches(); // NodeID map_address_to_node(Address addr); MachineID mapAddressToRange(Address addr, MachineType type, int low, int high); +NetDest broadcast(MachineType type); MachineID map_Address_to_DMA(Address addr); MachineID map_Address_to_Directory(Address addr); NodeID map_Address_to_DirectoryNode(Address addr); diff --git a/src/mem/protocol/SConsopts b/src/mem/protocol/SConsopts index 7be9fd97e..10a303681 100644 --- a/src/mem/protocol/SConsopts +++ b/src/mem/protocol/SConsopts @@ -50,7 +50,7 @@ all_protocols = [ 'MOESI_hammer', ] -opt = EnumVariable('PROTOCOL', 'Coherence Protocol for Ruby', 'MI_example', +opt = EnumVariable('PROTOCOL', 'Coherence Protocol for Ruby', 'MOESI_CMP_directory', all_protocols) sticky_vars.AddVariables(opt) diff --git a/src/mem/ruby/buffers/MessageBuffer.hh b/src/mem/ruby/buffers/MessageBuffer.hh index 8440c3335..950423ee5 100644 --- a/src/mem/ruby/buffers/MessageBuffer.hh +++ b/src/mem/ruby/buffers/MessageBuffer.hh @@ -64,6 +64,11 @@ public: (m_prio_heap.peekMin().m_time <= g_eventQueue_ptr->getTime())); } + void delayHead() { + MessageBufferNode node = m_prio_heap.extractMin(); + enqueue(node.m_msgptr, 1); + } + bool areNSlotsAvailable(int n); int getPriority() { return m_priority_rank; } void setPriority(int rank) { m_priority_rank = rank; } diff --git a/src/mem/ruby/config/MESI_CMP_directory.rb b/src/mem/ruby/config/MESI_CMP_directory.rb index 4d9ff30b3..7a9d47f24 100644 --- a/src/mem/ruby/config/MESI_CMP_directory.rb +++ b/src/mem/ruby/config/MESI_CMP_directory.rb @@ -12,8 +12,8 @@ class MESI_CMP_directory_L2CacheController < CacheController def argv() vec = super() vec += " cache " + cache.obj_name - vec += " l2_request_latency "+l2_request_latency.to_s - vec += " l2_response_latency "+l2_response_latency.to_s + vec += " l2_request_latency "+request_latency.to_s + vec += " l2_response_latency "+response_latency.to_s vec += " to_l1_latency "+to_L1_latency.to_s return vec end diff --git a/src/mem/ruby/config/MI_example-homogeneous.rb b/src/mem/ruby/config/MI_example-homogeneous.rb index 1ed81ee42..d409e6782 100644 --- a/src/mem/ruby/config/MI_example-homogeneous.rb +++ b/src/mem/ruby/config/MI_example-homogeneous.rb @@ -13,7 +13,7 @@ RubySystem.reset # default values num_cores = 2 -l1_cache_size_kb = 32768 +l1_cache_size_bytes = 32768 l1_cache_assoc = 8 l1_cache_latency = 1 num_memories = 2 @@ -34,6 +34,13 @@ for i in 0..$*.size-1 do elsif $*[i] == "-m" num_memories = $*[i+1].to_i i = i+1 + elsif $*[i] == "-R" + if $*[i+1] == "rand" + RubySystem.random_seed = "rand" + else + RubySystem.random_seed = $*[i+1].to_i + end + i = i+ 1 elsif $*[i] == "-s" memory_size_mb = $*[i+1].to_i i = i + 1 diff --git a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb index 566055f74..ee22df656 100644 --- a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb +++ b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb @@ -12,13 +12,13 @@ RubySystem.reset # default values num_cores = 2 -l1_icache_size_bytes = 32768 +l1_icache_size_kb = 64 l1_icache_assoc = 8 l1_icache_latency = 1 -l1_dcache_size_bytes = 32768 +l1_dcache_size_kb = 32 l1_dcache_assoc = 8 l1_dcache_latency = 1 -l2_cache_size_bytes = 2048 # total size (sum of all banks) +l2_cache_size_kb = 8192 # total size (sum of all banks) l2_cache_assoc = 16 l2_cache_latency = 12 num_l2_banks = num_cores @@ -26,7 +26,8 @@ num_memories = 1 memory_size_mb = 1024 num_dma = 1 -protocol = "MOESI_CMP_token" +#default protocol +protocol = "MOESI_CMP_directory" # check for overrides @@ -34,59 +35,50 @@ for i in 0..$*.size-1 do if $*[i] == "-c" or $*[i] == "--protocol" i += 1 protocol = $*[i] + elsif $*[i] == "-A" + l1_dcache_size_kb = $*[i+1].to_i + i = i+1 + elsif $*[i] == "-B" + num_l2_banks = $*[i+1].to_i + i = i+1 elsif $*[i] == "-m" num_memories = $*[i+1].to_i i = i+1 elsif $*[i] == "-p" num_cores = $*[i+1].to_i i = i+1 + elsif $*[i] == "-R" + if $*[i+1] == "rand" + RubySystem.random_seed = "rand" + else + RubySystem.random_seed = $*[i+1].to_i + end + i = i+ 1 elsif $*[i] == "-s" memory_size_mb = $*[i+1].to_i i = i + 1 - elsif $*[i] == "-C" - l1_dcache_size_bytes = $*[i+1].to_i - i = i + 1 - elsif $*[i] == "-A" - l1_dcache_assoc = $*[i+1].to_i - i = i + 1 - elsif $*[i] == "-D" - num_dma = $*[i+1].to_i - i = i + 1 end end -n_tokens = num_cores + 1 - net_ports = Array.new iface_ports = Array.new -#assert(protocol == "MESI_CMP_directory", __FILE__+" cannot be used with protocol "+protocol); +assert((protocol == "MESI_CMP_directory" or protocol == "MOESI_CMP_directory"), __FILE__+" cannot be used with protocol '#{protocol}'"); require protocol+".rb" num_cores.times { |n| - icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_bytes, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU") - dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_bytes, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU") + icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb*1024, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU") + dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb*1024, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU") sequencer = Sequencer.new("Sequencer_"+n.to_s, icache, dcache) iface_ports << sequencer - if protocol == "MOESI_CMP_token" - net_ports << MOESI_CMP_token_L1CacheController.new("L1CacheController_"+n.to_s, - "L1Cache", - icache, dcache, - sequencer, - num_l2_banks, - n_tokens) - end - if protocol == "MOESI_CMP_directory" net_ports << MOESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s, "L1Cache", icache, dcache, sequencer, num_l2_banks) - end - - if protocol == "MESI_CMP_directory" + elsif protocol == "MESI_CMP_directory" net_ports << MESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s, "L1Cache", icache, dcache, @@ -95,47 +87,29 @@ num_cores.times { |n| end } num_l2_banks.times { |n| - cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_bytes/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU") - if protocol == "MOESI_CMP_token" - net_ports << MOESI_CMP_token_L2CacheController.new("L2CacheController_"+n.to_s, - "L2Cache", - cache, - n_tokens) - end - + cache = SetAssociativeCache.new("l2u_"+n.to_s, (l2_cache_size_kb*1024)/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU") if protocol == "MOESI_CMP_directory" net_ports << MOESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s, "L2Cache", cache) - end - - if protocol == "MESI_CMP_directory" + elsif protocol == "MESI_CMP_directory" net_ports << MESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s, "L2Cache", cache) end - + net_ports.last.request_latency = l2_cache_latency + 2 + net_ports.last.response_latency = l2_cache_latency + 2 } num_memories.times { |n| directory = DirectoryMemory.new("DirectoryMemory_"+n.to_s, memory_size_mb/num_memories) memory_control = MemoryControl.new("MemoryControl_"+n.to_s) - if protocol == "MOESI_CMP_token" - net_ports << MOESI_CMP_token_DirectoryController.new("DirectoryController_"+n.to_s, - "Directory", - directory, - memory_control, - num_l2_banks) - end - if protocol == "MOESI_CMP_directory" net_ports << MOESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s, "Directory", directory, memory_control) - end - - if protocol == "MESI_CMP_directory" + elsif protocol == "MESI_CMP_directory" net_ports << MESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s, "Directory", directory, @@ -146,19 +120,11 @@ num_memories.times { |n| num_dma.times { |n| dma_sequencer = DMASequencer.new("DMASequencer_"+n.to_s) iface_ports << dma_sequencer - if protocol == "MOESI_CMP_token" - net_ports << MOESI_CMP_token_DMAController.new("DMAController_"+n.to_s, - "DMA", - dma_sequencer) - end - if protocol == "MOESI_CMP_directory" net_ports << MOESI_CMP_directory_DMAController.new("DMAController_"+n.to_s, "DMA", dma_sequencer) - end - - if protocol == "MESI_CMP_directory" + elsif protocol == "MESI_CMP_directory" net_ports << MESI_CMP_directory_DMAController.new("DMAController_"+n.to_s, "DMA", dma_sequencer) diff --git a/src/mem/ruby/config/assert.rb b/src/mem/ruby/config/assert.rb new file mode 100644 index 000000000..cc3e43214 --- /dev/null +++ b/src/mem/ruby/config/assert.rb @@ -0,0 +1,18 @@ +#!/usr/bin/env ruby + +class AssertionFailure < RuntimeError + attr_reader :msg, :output + def initialize(message, out=nil) + @msg = message + @output = out + end +end + +class NotImplementedException < Exception +end + +def assert(condition,message) + unless condition + raise AssertionFailure.new(message), "\n\nAssertion failed: \n\n #{message}\n\n" + end +end diff --git a/src/mem/ruby/config/cfg.rb b/src/mem/ruby/config/cfg.rb index c470ca92f..a20562243 100644 --- a/src/mem/ruby/config/cfg.rb +++ b/src/mem/ruby/config/cfg.rb @@ -1,7 +1,7 @@ #!/usr/bin/ruby -class AssertionFailure < RuntimeError -end +root = File.dirname(File.expand_path(__FILE__)) +require root+'/assert.rb' class Boolean def self.is_a?(obj) @@ -9,22 +9,46 @@ class Boolean end end -def assert(condition,message) - unless condition - raise AssertionFailure, "\n\nAssertion failed: \n\n #{message}\n\n" - end -end - class LibRubyObject @@all_objs = Array.new - attr_reader :obj_name @@default_params = Hash.new + @@param_types = Hash.new + + attr_reader :obj_name def initialize(obj_name) assert obj_name.is_a?(String), "Obj_Name must be a string" @obj_name = obj_name @@all_objs << self @params = Hash.new + + # add all parent parameter accessors if they don't exist + self.class.ancestors.each { |ancestor| + if @@default_params.key?(ancestor.name.to_sym) + @@default_params[ancestor.name.to_sym].each { |p, default| + p = p.to_sym + @params[p] = default + if ! respond_to?(p) + self.class.send(:define_method, p) { + @params[p] = @@default_params[ancestor.name.to_sym][p] if ! @params.key?(p) + return @params[p] + } + end + setter_method_name = (p.to_s + "=").to_sym + if ! respond_to?(setter_method_name) + self.class.send(:define_method, setter_method_name) { |val| + type = @@param_types[ancestor.name.to_sym][p] + if val.is_a?(FalseClass) || val.is_a?(TrueClass) + assert type.is_a?(Boolean), "default value of param \"#{p}\" must be either true or false" + else + assert val.is_a?(type), "default value of param \"#{p}\", which is of type #{val.class.name} does not match expected type #{type}" + end + @params[p] = val + } + end + } + end + } end def cppClassName() @@ -35,40 +59,24 @@ class LibRubyObject idx = self.name.to_sym @@default_params[idx] = Hash.new if ! @@default_params.key?(idx) @@default_params[idx][param_name] = nil - send :define_method, param_name do - @params[param_name] = @@default_params[idx][param_name] if ! @params.key?(param_name) - @params[param_name] - end - method_name = (param_name.to_s + "=").to_sym - send :define_method, method_name do |val| - if val.is_a?(FalseClass) || val.is_a?(TrueClass) - assert type.is_a?(Boolean), "default value of param \"#{param_name}\" must be either true or false" - else - assert val.is_a?(type), "default value of param \"#{param_name}\" does not match type #{type}" - end -# assert val.is_a?(type), "#{param_name} must be of type #{type}" - @params[param_name] = val - end + @@param_types[idx] = Hash.new if ! @@param_types.key?(idx) + @@param_types[idx][param_name] = type end def self.default_param(param_name, type, default) - idx = self.name.to_sym - @@default_params[idx] = Hash.new if ! @@default_params.key?(idx) + if default.is_a?(FalseClass) || default.is_a?(TrueClass) assert type.is_a?(Boolean), "default value of param \"#{param_name}\" must be either true or false" else assert default.is_a?(type), "default value of param \"#{param_name}\" does not match type #{type}" end + + idx = self.name.to_sym + @@default_params[idx] = Hash.new if ! @@default_params.key?(idx) @@default_params[idx][param_name] = default - send :define_method, param_name do - @params[param_name] = @@default_params[idx][param_name] if ! @params.key?(param_name) - @params[param_name] - end - method_name = (param_name.to_s + "=").to_sym - send :define_method, method_name do |val| - assert val.is_a?(type), "#{param_name} must be of type #{type}" - @params[param_name] = val - end + @@param_types[idx] = Hash.new if ! @@param_types.key?(idx) + @@param_types[idx][param_name] = type + end def applyDefaults() @@ -86,6 +94,7 @@ class LibRubyObject @params.each { |key, val| str += key.id2name + " " + assert(val != nil, "parameter #{key} is nil") if val.is_a?(LibRubyObject) str += val.obj_name + " " else @@ -123,36 +132,32 @@ end class NetPort < LibRubyObject attr :mach_type - attr_reader :version + param :version, Integer @@type_cnt = Hash.new - @type_id def initialize(obj_name, mach_type) super(obj_name) @mach_type = mach_type @@type_cnt[mach_type] ||= 0 - @type_id = @@type_cnt[mach_type] + self.version= @@type_cnt[mach_type] # sets the version parameter + @@type_cnt[mach_type] += 1 - idx = "NetPort".to_sym - @@default_params[idx] = Hash.new if ! @@default_params.key?(idx) - @@default_params[idx].each { |key, val| - @params[key] = val if ! @params.key?(key) - } end def port_name mach_type end def port_num - @type_id - end - def cppClassName - "NetPort" + version end def self.totalOfType(mach_type) return @@type_cnt[mach_type] end + def cppClassName() + "generated:"+@mach_type + end + end class MemoryVector < LibRubyObject @@ -161,7 +166,7 @@ class MemoryVector < LibRubyObject end def cppClassName - "MemoryController" + "MemoryVector" end end @@ -296,37 +301,13 @@ private end - - - class CacheController < NetPort - @@total_cache_controllers = Hash.new def initialize(obj_name, mach_type, caches) super(obj_name, mach_type) caches.each { |cache| cache.controller = self } - - if !@@total_cache_controllers.key?(mach_type) - @@total_cache_controllers[mach_type] = 0 - end - @version = @@total_cache_controllers[mach_type] - @@total_cache_controllers[mach_type] += 1 - - # call inhereted parameters - transitions_per_cycle - buffer_size - number_of_TBEs - recycle_latency - end - - def argv() - vec = "version "+@version.to_s - vec += " transitions_per_cycle "+@params[:transitions_per_cycle].to_s - vec += " buffer_size "+@params[:buffer_size].to_s - vec += " number_of_TBEs "+@params[:number_of_TBEs].to_s - vec += " recycle_latency "+@params[:recycle_latency].to_s end def cppClassName() @@ -334,89 +315,92 @@ class CacheController < NetPort end end +class Sequencer < IfacePort +end + class L1CacheController < CacheController - attr :sequencer + param :sequencer, Sequencer def initialize(obj_name, mach_type, caches, sequencer) super(obj_name, mach_type, caches) - @sequencer = sequencer - @sequencer.controller = self - @sequencer.version = @version + sequencer.controller = self + sequencer.version = version + self.sequencer= sequencer end - def argv() - vec = super() - vec += " sequencer "+@sequencer.obj_name - end +# def argv() +# vec = super() +# vec += " sequencer "+@sequencer.obj_name +# end +end + +class DirectoryMemory < LibRubyObject +end +class MemoryControl < LibRubyObject end class DirectoryController < NetPort @@total_directory_controllers = 0 - attr :directory - attr :memory_control + param :directory, DirectoryMemory + param :memory_control, MemoryControl def initialize(obj_name, mach_type, directory, memory_control) super(obj_name, mach_type) - @directory = directory directory.controller = self - - @memory_control = memory_control + directory.version = @@total_directory_controllers + self.directory = directory + self.memory_control = memory_control @version = @@total_directory_controllers @@total_directory_controllers += 1 buffer_size() end - def argv() - "version "+@version.to_s+" directory_name "+@directory.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s + " memory_controller_name "+@memory_control.obj_name + " recycle_latency "+@params[:recycle_latency].to_s - end - def cppClassName() "generated:"+@mach_type end end +class DMASequencer < IfacePort +end + class DMAController < NetPort @@total_dma_controllers = 0 - attr :dma_sequencer + param :dma_sequencer, DMASequencer + param :version, Integer + def initialize(obj_name, mach_type, dma_sequencer) super(obj_name, mach_type) - @dma_sequencer = dma_sequencer - @version = @@total_dma_controllers - @@total_dma_controllers += 1 dma_sequencer.controller = self - buffer_size - end + dma_sequencer.version = @@total_dma_controllers + self.dma_sequencer = dma_sequencer - def argv() - "version "+@version.to_s+" dma_sequencer "+@dma_sequencer.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s + " recycle_latency "+@params[:recycle_latency].to_s + self.version = @@total_dma_controllers + @@total_dma_controllers += 1 end - def cppClassName() - "generated:"+@mach_type - end end class Cache < LibRubyObject - attr :size, :latency - attr_writer :controller + param :size, Integer + param :latency, Integer + param :controller, NetPort def initialize(obj_name, size, latency) super(obj_name) - assert size.is_a?(Integer), "Cache size must be an integer" - @size = size - @latency = latency + self.size = size + self.latency = latency + # controller must be set manually by the configuration script + # because there is a cyclic dependence end - def args - "controller "+@controller.obj_name+" size "+@size.to_s+" latency "+@latency.to_s - end end class SetAssociativeCache < Cache - attr :assoc, :replacement_policy + param :assoc, Integer + param :replacement_policy, String # latency can be either an integer, a float, or the string "auto" # when an integer, it represents the number of cycles for a hit @@ -424,74 +408,68 @@ class SetAssociativeCache < Cache # when set to "auto", libruby will attempt to find a realistic latency by running CACTI def initialize(obj_name, size, latency, assoc, replacement_policy) super(obj_name, size, latency) - @assoc = assoc - @replacement_policy = replacement_policy + self.assoc = assoc + self.replacement_policy = replacement_policy end def calculateLatency() - if @latency == "auto" + if self.latency == "auto" cacti_args = Array.new() - cacti_args << (@size) << RubySystem.block_size_bytes << @assoc + cacti_args << (self.size*1024) << RubySystem.block_size_bytes << self.assoc cacti_args << 1 << 0 << 0 << 0 << 1 cacti_args << RubySystem.tech_nm << RubySystem.block_size_bytes*8 cacti_args << 0 << 0 << 0 << 1 << 0 << 0 << 0 << 0 << 1 cacti_args << 360 << 0 << 0 << 0 << 0 << 1 << 1 << 1 << 1 << 0 << 0 cacti_args << 50 << 10 << 10 << 0 << 1 << 1 -# cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ") - -# IO.popen(cacti_cmd) { |pipe| -# str1 = pipe.readline -# str2 = pipe.readline -# results = str2.split(", ") -# if results.size != 61 -# print "CACTI ERROR: CACTI produced unexpected output.\n" -# print "Are you using the version shipped with libruby?\n" -# raise Exception -# end -# latency_ns = results[5].to_f -# if (latency_ns == "1e+39") -# print "CACTI ERROR: CACTI was unable to realistically model the cache ",@obj_name,"\n" -# print "Either change the cache parameters or manually set the latency values\n" -# raise Exception -# end -# clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6)) -# latency_cycles = (latency_ns / clk_period_ns).ceil -# @latency = latency_cycles -# } - elsif @latency.is_a?(Float) + cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ") + + IO.popen(cacti_cmd) { |pipe| + str1 = pipe.readline + str2 = pipe.readline + results = str2.split(", ") + if results.size != 61 + print "CACTI ERROR: CACTI produced unexpected output.\n" + print "Are you using the version shipped with libruby?\n" + raise Exception + end + latency_ns = results[5].to_f + if (latency_ns == "1e+39") + print "CACTI ERROR: CACTI was unable to realistically model the cache ",@obj_name,"\n" + print "Either change the cache parameters or manually set the latency values\n" + raise Exception + end + clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6)) + latency_cycles = (latency_ns / clk_period_ns).ceil + self.latency = latency_cycles + } + elsif self.latency.is_a?(Float) clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6)) - latency_cycles = (@latency / clk_period_ns).ceil - @latency = latency_cycles - elsif ! @latency.is_a?(Integer) + latency_cycles = (self.latency / clk_period_ns).ceil + self.latency = latency_cycles + elsif ! self.latency.is_a?(Integer) raise Exception end end - def argv() - args+" assoc "+@assoc.to_s+" replacement_policy "+@replacement_policy - end - def cppClassName() "SetAssociativeCache" end end class DirectoryMemory < LibRubyObject - attr :size_mb - attr_writer :controller + param :size_mb, Integer + param :controller, NetPort + param :version, Integer + @@total_size_mb = 0 def initialize(obj_name, size_mb) super(obj_name) - @size_mb = size_mb + self.size_mb = size_mb @@total_size_mb += size_mb end - def argv() - "version "+@controller.version.to_s+" size_mb "+@size_mb.to_s+" controller "+@controller.obj_name - end - def cppClassName() "DirectoryMemory" end @@ -501,43 +479,17 @@ class DirectoryMemory < LibRubyObject end end -#added by SS class MemoryControl < LibRubyObject - attr :name def initialize(obj_name) super(obj_name) - @name = obj_name end - def argv() - vec = super() - vec += " mem_bus_cycle_multiplier "+mem_bus_cycle_multiplier.to_s - vec += " banks_per_rank "+banks_per_rank.to_s - vec += " ranks_per_dimm "+ranks_per_dimm.to_s - vec += " dimms_per_channel "+dimms_per_channel.to_s - vec += " bank_bit_0 "+bank_bit_0.to_s - vec += " rank_bit_0 "+rank_bit_0.to_s - vec += " dimm_bit_0 "+dimm_bit_0.to_s - vec += " bank_queue_size "+bank_queue_size.to_s - vec += " bank_busy_time "+bank_busy_time.to_s - vec += " rank_rank_delay "+rank_rank_delay.to_s - vec += " read_write_delay "+read_write_delay.to_s - vec += " basic_bus_busy_time "+basic_bus_busy_time.to_s - vec += " mem_ctl_latency "+mem_ctl_latency.to_s - vec += " refresh_period "+refresh_period.to_s - vec += " tFaw "+tFaw.to_s - vec += " mem_random_arbitrate "+mem_random_arbitrate.to_s - vec += " mem_fixed_delay "+mem_fixed_delay.to_s - vec += " memory_controller_name "+@name - - end - - def cppClassName() "MemoryControl" end end + class Sequencer < IfacePort def cppClassName() @@ -564,17 +516,11 @@ end class DMASequencer < IfacePort + param :controller, NetPort + param :version, Integer + def initialize(obj_name) super(obj_name) - @params = { - :controller => nil, - :version => nil - } - end - - def controller=(controller) - @params[:controller] = controller.obj_name - @params[:version] = controller.version end def cppClassName() @@ -582,7 +528,7 @@ class DMASequencer < IfacePort end def bochsConnType() - return "dma"+@params[:version].to_s + return "dma"+self.version.to_s end end @@ -613,22 +559,8 @@ class Network < LibRubyObject param :topology, Topology def initialize(name, topo) super(name) - @params[:topology] = topo topo.network= self - end - - def argv() - vec = super() - - vec += " endpoint_bandwidth "+endpoint_bandwidth.to_s - vec += " adaptive_routing "+adaptive_routing.to_s - vec += " number_of_virtual_networks "+number_of_virtual_networks.to_s - vec += " fan_out_degree "+fan_out_degree.to_s - - vec += " buffer_size "+buffer_size.to_s - vec += " link_latency "+adaptive_routing.to_s - vec += " on_chip_latency "+on_chip_latency.to_s - vec += " control_msg_size "+control_msg_size.to_s + self.topology = topo end def printTopology() @@ -689,7 +621,6 @@ class CrossbarTopology < Topology end end -#added by SS class Tracer < LibRubyObject def initialize(obj_name) super(obj_name) @@ -712,20 +643,10 @@ class Profiler < LibRubyObject end -#added by SS class GarnetNetwork < Network def initialize(name, topo) super(name, topo) end - def argv() - vec = super() - vec += " flit_size "+flit_size.to_s - vec += " number_of_pipe_stages "+number_of_pipe_stages.to_s - vec += " vcs_per_class "+vcs_per_class.to_s - vec += " buffer_size "+buffer_size.to_s - vec += " using_network_testing "+using_network_testing.to_s - end - end class GarnetFixedPipeline < GarnetNetwork @@ -733,10 +654,6 @@ class GarnetFixedPipeline < GarnetNetwork super(name, net_ports) end - def argv() - super() - end - def cppClassName() "GarnetNetwork_d" end @@ -747,14 +664,9 @@ class GarnetFlexiblePipeline < GarnetNetwork super(name, net_ports) end - def argv() - super() - end - def cppClassName() "GarnetNetwork" end end -#added by SS require "defaults.rb" diff --git a/src/mem/ruby/config/config.hh b/src/mem/ruby/config/config.hh deleted file mode 100644 index ad91cd73d..000000000 --- a/src/mem/ruby/config/config.hh +++ /dev/null @@ -1,236 +0,0 @@ - -// FOR MOESI_CMP_token -//PARAM_BOOL( FilteringEnabled, false, false ); -//PARAM_BOOL( DistributedPersistentEnabled, true, false ); -//PARAM_BOOL( DynamicTimeoutEnabled, true, false ); -//PARAM( RetryThreshold, 1, false ); -//PARAM( FixedTimeoutLatency, 300, false ); - -//PARAM( TraceWarmupLength, 1000000, false ); - -//PARAM( callback_counter, 0, false ); -//PARAM( NUM_COMPLETIONS_BEFORE_PASS, 0, false ); - -//PARAM( tester_length, 0, false ); -//PARAM( synthetic_locks, 2048, false ); -//PARAM( think_time, 5, false ); -//PARAM( wait_time, 5, false ); -//PARAM( hold_time, 5, false ); -//PARAM( deterministic_addrs, 1, false ); -//PARAM_STRING( SpecifiedGenerator, "DetermInvGenerator", false ); - -// For debugging purposes, one can enable a trace of all the protocol -// state machine changes. Unfortunately, the code to generate the -// trace is protocol specific. To enable the code for some of the -// standard protocols, -// 1. change "PROTOCOL_DEBUG_TRACE = true" -// 2. enable debug in Makefile -// 3. use the "--start 1" command line parameter or -// "g_debug_ptr->setDebugTime(1)" to beging the following to set the -// debug begin time -// -// this use to be ruby/common/Global.hh - -//PARAM_BOOL( ProtocolDebugTrace, true, false ); -// a string for filtering debugging output (for all g_debug vars see Debug.hh) -//PARAM_STRING( DEBUG_FILTER_STRING, "", false ); -// filters debugging messages based on priority (low, med, high) -//PARAM_STRING( DEBUG_VERBOSITY_STRING, "", false ); -// filters debugging messages based on a ruby time -//PARAM_ULONG( DEBUG_START_TIME, 0, false ); -// sends debugging messages to a output filename -//PARAM_STRING( DEBUG_OUTPUT_FILENAME, "", false ); - -//PARAM_BOOL( ProfileHotLines, false, false ); - -// PROFILE_ALL_INSTRUCTIONS is used if you want Ruby to profile all instructions executed -// The following need to be true for this to work correctly: -// 1. Disable istc and dstc for this simulation run -// 2. Add the following line to the object "sim" in the checkpoint you run from: -// instruction_profile_line_size: 4 -// This is used to have simics report back all instruction requests - -// For more details on how to find out how to interpret the output physical instruction -// address, please read the document in the simics-howto directory -//PARAM_BOOL( ProfileAllInstructions, false, false ); - -// Set the following variable to true if you want a complete trace of -// PCs (physical address of program counters, with executing processor IDs) -// to be printed to stdout. Make sure to direct the simics output to a file. -// Otherwise, the run will take a really long time! -// A long run may write a file that can exceed the OS limit on file length -//PARAM_BOOL( PRINT_INSTRUCTION_TRACE, false, false ); -//PARAM( DEBUG_CYCLE, 0, false ); - -// Make the entire memory system perfect -//PARAM_BOOL( PERFECT_MEMORY_SYSTEM, false, false ); -//PARAM( PERFECT_MEMORY_SYSTEM_LATENCY, 0, false ); - -// ********************************************* -// SYSTEM PARAMETERS -// ********************************************* - -//PARAM( NumberOfChips, 1, false ); -//PARAM( NumberOfCores, 2, false ); -//PARAM_ARRAY( NumberOfCoresPerChip, int, m_NumberOfChips, 2, false); - -// ********************************************* -// CACHE PARAMETERS -// ********************************************* - -//PARAM( NumberOfCaches, m_NumberOfCores, false ); -//PARAM( NumberOfCacheLevels, 1, false ); -/* this returns the number of discrete CacheMemories per level (i.e. a split L1 counts for 2) */ -//PARAM_ARRAY( NumberOfCachesPerLevel, int, m_NumberOfCacheLevels, m_NumberOfCores, false ); // this is the number of discrete caches if the level is private - // or the number of banks if the level is shared -//PARAM( CacheIDFromParams, 1, true ); // returns a unique CacheID from the parameters (level, num, split_type) -//PARAM_ARRAY( CacheLatency, int, m_NumberOfCaches, 1, false ); // returns the latency for cache, indexed by CacheID -//PARAM_ARRAY( CacheSplitType, string, m_NumberOfCaches, "unified", false ); // returns "data", "instruction", or "unified", indexed by CacheID -//PARAM_ARRAY( CacheType, string, m_NumberOfCaches, "SetAssociative", false ); // returns the type of a cache, indexed by CacheID -//PARAM_ARRAY( CacheAssoc, int, m_NumberOfCaches, 4, false ); // returns the cache associativity, indexed by CacheID -//PARAM_ARRAY( NumberOfCacheSets, int, m_NumberOfCaches, 256, false ); // returns the number of cache sets, indexed by CacheID -//PARAM_ARRAY( NumberOfCacheSetBits, int, m_NumberOfCaches, log_int(256), false ); // returns the number of cache set bits, indexed by CacheID -//PARAM_ARRAY( CacheReplacementPolicy, string, m_NumberOfCaches, "PSEUDO_LRU", false ); // other option is "LRU" - -//PARAM( DataBlockBytes, 64, false ); -//PARAM( DataBlockBits, log_int(m_DataBlockBytes), false); - -// ******************************************** -// MEMORY PARAMETERS -// ******************************************** - -//PARAM_ARRAY( NumberOfControllersPerType, int, m_NumberOfCacheLevels+2, m_NumberOfCores, false); -//PARAM_ARRAY2D( NumberOfControllersPerTypePerChip, int, m_NumberOfCacheLevels+2, m_NumberOfChips, m_NumberOfCores, false); - -// ******************************************** -// DMA CONTROLLER PARAMETERS -// ******************************************** - -//PARAM( NumberOfDMA, 1, false ); -//PARAM_ARRAY( NumberOfDMAPerChip, int, m_NumberOfChips, 1, false); -//PARAM_ARRAY( ChipNumFromDMAVersion, int, m_NumberOfDMA, 0, false ); - -//PARAM_ULONG( MemorySizeBytes, 4294967296, false ); -//PARAM_ULONG( MemorySizeBits, 32, false); - -//PARAM( NUM_PROCESSORS, 0, false ); -//PARAM( NUM_L2_BANKS, 0, false ); -//PARAM( NUM_MEMORIES, 0, false ); -//PARAM( ProcsPerChip, 1, false ); - -// The following group of parameters are calculated. They must -// _always_ be left at zero. -//PARAM( NUM_CHIPS, 0, false ); -//PARAM( NUM_CHIP_BITS, 0, false ); -//PARAM( MEMORY_SIZE_BITS, 0, false ); -//PARAM( DATA_BLOCK_BITS, 0, false ); -//PARAM( PAGE_SIZE_BITS, 0, false ); -//PARAM( NUM_PROCESSORS_BITS, 0, false ); -//PARAM( PROCS_PER_CHIP_BITS, 0, false ); -//PARAM( NUM_L2_BANKS_BITS, 0, false ); -//PARAM( NUM_L2_BANKS_PER_CHIP_BITS, 0, false ); -//PARAM( NUM_L2_BANKS_PER_CHIP, 0, false ); -//PARAM( NUM_MEMORIES_BITS, 0, false ); -//PARAM( NUM_MEMORIES_PER_CHIP, 0, false ); -//PARAM( MEMORY_MODULE_BITS, 0, false ); -//PARAM_ULONG( MEMORY_MODULE_BLOCKS, 0, false ); - -// TIMING PARAMETERS -//PARAM( DIRECTORY_CACHE_LATENCY, 6, false ); - -//PARAM( NULL_LATENCY, 1, false ); -//PARAM( ISSUE_LATENCY, 2, false ); -//PARAM( CACHE_RESPONSE_LATENCY, 12, false ); -//PARAM( L2_RESPONSE_LATENCY, 6, false ); -//PARAM( L2_TAG_LATENCY, 6, false ); -//PARAM( L1_RESPONSE_LATENCY, 3, false ); - -//PARAM( MEMORY_RESPONSE_LATENCY_MINUS_2, 158, false ); -//PARAM( DirectoryLatency, 6, false ); - -//PARAM( NetworkLinkLatency, 1, false ); -//PARAM( COPY_HEAD_LATENCY, 4, false ); -//PARAM( OnChipLinkLatency, 1, false ); -//PARAM( RecycleLatency, 10, false ); -//PARAM( L2_RECYCLE_LATENCY, 5, false ); -//PARAM( TIMER_LATENCY, 10000, false ); -//PARAM( TBE_RESPONSE_LATENCY, 1, false ); -//PARAM_BOOL( PERIODIC_TIMER_WAKEUPS, true, false ); - -// constants used by CMP protocols -//PARAM( L1_REQUEST_LATENCY, 2, false ); -//PARAM( L2_REQUEST_LATENCY, 4, false ); -//PARAM_BOOL( SINGLE_ACCESS_L2_BANKS, true, false ); // hack to simulate multi-cycle L2 bank accesses - -// Ruby cycles between when a sequencer issues a miss it arrives at -// the L1 cache controller -//PARAM( SequencerToControllerLatency, 4, false ); - -// Number of transitions each controller state machines can complete per cycle -//PARAM( L1CacheTransitionsPerCycle, 32, false ); -//PARAM( L2CACHE_TRANSITIONS_PER_RUBY_CYCLE, 32, false ); -//PARAM( DirectoryTransitionsPerCycle, 32, false ); -//PARAM( DMATransitionsPerCycle, 1, false ); - -// Number of TBEs available for demand misses, prefetches, and replacements -//PARAM( NumberOfTBEs, 128, false ); -//PARAM( NumberOfL1TBEs, 32, false ); -//PARAM( NumberOfL2TBEs, 32, false ); - -// NOTE: Finite buffering allows us to simulate a wormhole routed network -// with idealized flow control. All message buffers within the network (i.e. -// the switch's input and output buffers) are set to the size specified below -// by the PROTOCOL_BUFFER_SIZE -//PARAM_BOOL( FiniteBuffering, false, false ); -//PARAM( FiniteBufferSize, 3, false ); // Zero is unbounded buffers -// Number of requests buffered between the sequencer and the L1 conroller -// This can be more accurately simulated in Opal, therefore it's set to an -// infinite number -// Only effects the simualtion when FINITE_BUFFERING is enabled -//PARAM( ProcessorBufferSize, 10, false ); -// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to -// Controllers. Controlls the number of request issued by the L2 HW Prefetcher -//PARAM( ProtocolBufferSize, 32, false ); - -// NETWORK PARAMETERS - -// Network Topology: See TopologyType in external.sm for valid values -//PARAM_STRING( NetworkTopology, "PT_TO_PT", false ); - -// Cache Design specifies file prefix for topology -//PARAM_STRING( CacheDesign, "NUCA", false ); - -//PARAM( EndpointBandwidth, 10000, false ); -//PARAM_BOOL( AdaptiveRouting, true, false ); -//PARAM( NumberOfVirtualNetworks, 6, false ); -//PARAM( FanOutDegree, 4, false ); -//PARAM_BOOL( PrintTopology, true, false ); - -// Princeton Network (Garnet) -//PARAM_BOOL( UsingGarnetNetwork, true, false ); -//PARAM_BOOL( UsingDetailNetwork, false, false ); -//PARAM_BOOL( UsingNetworkTesting, false, false ); -//PARAM( FlitSize, 16, false ); -//PARAM( NumberOfPipeStages, 4, false ); -//PARAM( VCSPerClass, 4, false ); -//PARAM( BufferSize, 4, false ); - -// MemoryControl: -//PARAM( MEM_BUS_CYCLE_MULTIPLIER, 10, false ); -//PARAM( BANKS_PER_RANK, 8, false ); -//PARAM( RANKS_PER_DIMM, 2, false ); -//PARAM( DIMMS_PER_CHANNEL, 2, false ); -//PARAM( BANK_BIT_0, 8, false ); -//PARAM( RANK_BIT_0, 11, false ); -//PARAM( DIMM_BIT_0, 12, false ); -//PARAM( BANK_QUEUE_SIZE, 12, false ); -//PARAM( BankBusyTime, 11, false ); -//PARAM( RANK_RANK_DELAY, 1, false ); -//PARAM( READ_WRITE_DELAY, 2, false ); -//PARAM( BASIC_BUS_BUSY_TIME, 2, false ); -//PARAM( MEM_CTL_LATENCY, 12, false ); -//PARAM( REFRESH_PERIOD, 1560, false ); -//PARAM( TFAW, 0, false ); -//PARAM( MEM_RANDOM_ARBITRATE, 0, false ); -//PARAM( MEM_FIXED_DELAY, 0, false ); - diff --git a/src/mem/ruby/config/defaults.rb b/src/mem/ruby/config/defaults.rb index f338f4e3f..224bf1eeb 100644 --- a/src/mem/ruby/config/defaults.rb +++ b/src/mem/ruby/config/defaults.rb @@ -1,7 +1,5 @@ #!/usr/bin/ruby - - class NetPort < LibRubyObject # number of transitions a SLICC state machine can transition per # cycle @@ -9,9 +7,8 @@ class NetPort < LibRubyObject # buffer_size limits the size of all other buffers connecting to # SLICC Controllers. When 0, infinite buffering is used. - default_param :buffer_size, Integer, 0 + default_param :buffer_size, Integer, 32 - # added by SS for TBE default_param :number_of_TBEs, Integer, 256 default_param :recycle_latency, Integer, 10 @@ -38,16 +35,36 @@ class Debug < LibRubyObject # 3. set start_time = 1 default_param :protocol_trace, Boolean, false - # a string for filtering debugging output (for all g_debug vars see Debug.h) + # a string for filtering debugging output. Valid options (also see Debug.cc): + # {"System", 's' }, + # {"Node", 'N' }, + # {"Queue", 'q' }, + # {"Event Queue", 'e' }, + # {"Network", 'n' }, + # {"Sequencer", 'S' }, + # {"Tester", 't' }, + # {"Generated", 'g' }, + # {"SLICC", 'l' }, + # {"Network Queues", 'Q' }, + # {"Time", 'T' }, + # {"Network Internals", 'i' }, + # {"Store Buffer", 'b' }, + # {"Cache", 'c' }, + # {"Predictor", 'p' }, + # {"Allocator", 'a' } + # + # e.g., "sq" will print system and queue debugging messages + # Set to "none" for no debugging output default_param :filter_string, String, "none" - # filters debugging messages based on priority (low, med, high) + # filters debugging messages based on priority (none, low, med, high) default_param :verbosity_string, String, "none" # filters debugging messages based on a ruby time default_param :start_time, Integer, 1 # sends debugging messages to a output filename + # set to "none" to print to stdout default_param :output_filename, String, "none" end @@ -65,23 +82,23 @@ class Topology < LibRubyObject # indicates whether the topology config will be displayed in the # stats file - default_param :print_config, Boolean, true + default_param :print_config, Boolean, false end class Network < LibRubyObject default_param :endpoint_bandwidth, Integer, 10000 default_param :adaptive_routing, Boolean, true - default_param :number_of_virtual_networks, Integer, 10 - default_param :fan_out_degree, Integer, 4 + default_param :number_of_virtual_networks, Integer, 5 + # default_param :fan_out_degree, Integer, 4 # default buffer size. Setting to 0 indicates infinite buffering - default_param :buffer_size, Integer, 0 + # default_param :buffer_size, Integer, 0 # local memory latency ?? NetworkLinkLatency default_param :link_latency, Integer, 1 # on chip latency - default_param :on_chip_latency, Integer, 1 + # default_param :on_chip_latency, Integer, 1 default_param :control_msg_size, Integer, 8 end @@ -94,20 +111,15 @@ class GarnetNetwork < Network default_param :using_network_testing, Boolean, false end - - -#added by SS class Tracer < LibRubyObject default_param :warmup_length, Integer, 1000000 end -#added by SS class Profiler < LibRubyObject default_param :hot_lines, Boolean, false default_param :all_instructions, Boolean, false end -#added by SS class MemoryControl < LibRubyObject default_param :mem_bus_cycle_multiplier, Integer, 10 @@ -125,7 +137,7 @@ class MemoryControl < LibRubyObject default_param :mem_ctl_latency, Integer, 12 default_param :refresh_period, Integer, 1560 default_param :tFaw, Integer, 0 - default_param :mem_random_arbitrate, Integer, 0 + default_param :mem_random_arbitrate, Integer, 11 default_param :mem_fixed_delay, Integer, 0 end @@ -163,49 +175,33 @@ class MOESI_CMP_directory_DirectoryController < DirectoryController end class MOESI_CMP_directory_DMAController < DMAController - default_param :request_latency, Integer, 6 - default_param :response_latency, Integer, 6 + default_param :request_latency, Integer, 14 + default_param :response_latency, Integer, 14 end -## MOESI_CMP_token protocol +class MESI_CMP_directory_L2CacheController < CacheController + default_param :request_latency, Integer, 2 + default_param :response_latency, Integer, 2 + default_param :to_L1_latency, Integer, 1 + +#if 0 then automatically calculated + default_param :lowest_bit, Integer, 0 + default_param :highest_bit, Integer, 0 +end -class MOESI_CMP_token_L1CacheController < L1CacheController +class MESI_CMP_directory_L1CacheController < L1CacheController default_param :l1_request_latency, Integer, 2 default_param :l1_response_latency, Integer, 2 - default_param :retry_threshold, Integer, 1 - default_param :fixed_timeout_latency, Integer, 300 - default_param :dynamic_timeout_enabled, Boolean, true + default_param :to_L2_latency, Integer, 1 end -class MOESI_CMP_token_L2CacheController < CacheController - default_param :l2_request_latency, Integer, 2 - default_param :l2_response_latency, Integer, 2 - default_param :filtering_enabled, Boolean, true -end -class MOESI_CMP_token_DirectoryController < DirectoryController +class MESI_CMP_directory_DirectoryController < DirectoryController + default_param :to_mem_ctrl_latency, Integer, 1 default_param :directory_latency, Integer, 6 - default_param :distributed_persistent, Boolean, true - default_param :fixed_timeout_latency, Integer, 300 -end - -class MOESI_CMP_token_DMAController < DMAController - default_param :request_latency, Integer, 6 - default_param :response_latency, Integer, 6 -end - -## MOESI_hammer protocol - -class MOESI_hammer_CacheController < L1CacheController - default_param :issue_latency, Integer, 2 - default_param :cache_response_latency, Integer, 12 -end - -class MOESI_hammer_DirectoryController < DirectoryController - default_param :memory_controller_latency, Integer, 12 end -class MOESI_hammer_DMAController < DMAController +class MESI_CMP_directory_DMAController < DMAController default_param :request_latency, Integer, 6 end @@ -219,8 +215,9 @@ class RubySystem # When set to true, the simulation will insert random delays on # message enqueue times. Note that even if this is set to false, # you can still have a non-deterministic simulation if random seed - # is set to "rand". This is because the Ruby swtiches use random - # link priority elevation + # is set to "rand". This is used mainly to debug protocols by forcing + # really strange interleavings and should not be used for + # performance runs. default_param :randomization, Boolean, false # tech_nm is the device size used to calculate latency and area @@ -246,31 +243,6 @@ class RubySystem default_param :profiler, Profiler, Profiler.new("profiler0") end -#added by SS - -class MESI_CMP_directory_L2CacheController < CacheController - default_param :l2_request_latency, Integer, 2 - default_param :l2_response_latency, Integer, 2 - default_param :to_L1_latency, Integer, 1 - -#if 0 then automatically calculated - default_param :lowest_bit, Integer, 0 - default_param :highest_bit, Integer, 0 -end - -class MESI_CMP_directory_L1CacheController < L1CacheController - default_param :l1_request_latency, Integer, 2 - default_param :l1_response_latency, Integer, 2 - default_param :to_L2_latency, Integer, 1 -end -class MESI_CMP_directory_DirectoryController < DirectoryController - default_param :to_mem_ctrl_latency, Integer, 1 - default_param :directory_latency, Integer, 6 -end - -class MESI_CMP_directory_DMAController < DMAController - default_param :request_latency, Integer, 6 -end diff --git a/src/mem/ruby/config/rubyconfig.defaults b/src/mem/ruby/config/rubyconfig.defaults deleted file mode 100644 index 936a2f091..000000000 --- a/src/mem/ruby/config/rubyconfig.defaults +++ /dev/null @@ -1,405 +0,0 @@ -// -// This file has been modified by Kevin Moore and Dan Nussbaum of the -// Scalable Systems Research Group at Sun Microsystems Laboratories -// (http://research.sun.com/scalable/) to support the Adaptive -// Transactional Memory Test Platform (ATMTP). For information about -// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/. -// -// Please send email to atmtp-interest@sun.com with feedback, questions, or -// to request future announcements about ATMTP. -// -// ---------------------------------------------------------------------- -// -// File modification date: 2008-02-23 -// -// ---------------------------------------------------------------------- -// -// ATMTP is distributed as part of the GEMS software toolset and is -// available for use and modification under the terms of version 2 of the -// GNU General Public License. The GNU General Public License is contained -// in the file $GEMS/LICENSE. -// -// Multifacet GEMS is free software; you can redistribute it and/or modify -// it under the terms of version 2 of the GNU General Public License as -// published by the Free Software Foundation. -// -// Multifacet GEMS is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// General Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with the Multifacet GEMS; if not, write to the Free Software Foundation, -// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA -// -// ---------------------------------------------------------------------- -// - -g_RANDOM_SEED: 1 - -g_DEADLOCK_THRESHOLD: 500000 - -// determines how many Simics cycles advance for every Ruby cycle -// (does not apply when running Opal) -SIMICS_RUBY_MULTIPLIER: 4 - -// Ruby cycles between when a sequencer issues a request and it arrives at -// the L1 cache controller -// -// ** important ** this parameter determines the L2 hit latency when -// using the SMP protocols with a combined L1/L2 controller (-cache.sm) -// -SEQUENCER_TO_CONTROLLER_LATENCY: 4 - - -// When set to false, the L1 cache structures are probed for a hit in Sequencer.C -// If a request hits, it is *not* issued to the cache controller -// When set to true, all processor data requests issue to cache controller -// -// ** important ** this parameter must be set to false for proper L1/L2 hit timing -// for the SMP protocols with combined L1/L2 controllers (-cache.sm) -// -REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false - - -// When running with Opal in SMT configurations, this indicates the number of threads per physical processor -g_NUM_SMT_THREADS: 1 - - -// Maximum number of requests (including SW prefetches) outstanding from -// the sequencer (Note: this also include items buffered in the store -// buffer) -g_SEQUENCER_OUTSTANDING_REQUESTS: 16 - - -PROTOCOL_DEBUG_TRACE: true -DEBUG_FILTER_STRING: none -DEBUG_VERBOSITY_STRING: none -DEBUG_START_TIME: 0 -DEBUG_OUTPUT_FILENAME: none - - -TRANSACTION_TRACE_ENABLED: false -USER_MODE_DATA_ONLY: false -PROFILE_HOT_LINES: false - -PROFILE_ALL_INSTRUCTIONS: false -PRINT_INSTRUCTION_TRACE: false -g_DEBUG_CYCLE: 0 -BLOCK_STC: false -PERFECT_MEMORY_SYSTEM: false -PERFECT_MEMORY_SYSTEM_LATENCY: 0 -DATA_BLOCK: false - - -// ********************************************* -// CACHE & MEMORY PARAMETERS -// ********************************************* - - -L1_CACHE_ASSOC: 4 -L1_CACHE_NUM_SETS_BITS: 8 -L2_CACHE_ASSOC: 4 -L2_CACHE_NUM_SETS_BITS: 16 - -// 32 bits = 4 GB address space -g_MEMORY_SIZE_BYTES: 1073741824 //4294967296 -g_DATA_BLOCK_BYTES: 64 -g_PAGE_SIZE_BYTES: 4096 -g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU - -g_PROCS_PER_CHIP: 1 - - -// set automatically -g_NUM_PROCESSORS: 0 -g_NUM_L2_BANKS: 0 -g_NUM_MEMORIES: 0 - -// The following group of parameters are calculated. They must -// _always_ be left at zero. -g_NUM_CHIPS: 0 -g_NUM_CHIP_BITS: 0 -g_MEMORY_SIZE_BITS: 0 -g_DATA_BLOCK_BITS: 0 -g_PAGE_SIZE_BITS: 0 -g_NUM_PROCESSORS_BITS: 0 -g_PROCS_PER_CHIP_BITS: 0 -g_NUM_L2_BANKS_BITS: 0 -g_NUM_L2_BANKS_PER_CHIP: 0 -g_NUM_L2_BANKS_PER_CHIP_BITS: 0 -g_NUM_MEMORIES_BITS: 0 -g_NUM_MEMORIES_PER_CHIP: 0 -g_MEMORY_MODULE_BITS: 0 -g_MEMORY_MODULE_BLOCKS: 0 - - -// For certain CMP protocols, determines whether the lowest bits of a block address -// are used to index to a L2 cache bank or into the sets of a -// single bank -// lowest highest -// true: g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS -// false: g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS -MAP_L2BANKS_TO_LOWEST_BITS: false - - - -// TIMING PARAMETERS -- many of these are protocol specific. See SLICC files -// to determine where they apply - -MEMORY_RESPONSE_LATENCY_MINUS_2: 158 // determines memory response latency -DIRECTORY_CACHE_LATENCY: 6 -NULL_LATENCY: 1 -ISSUE_LATENCY: 2 -CACHE_RESPONSE_LATENCY: 12 -L1_RESPONSE_LATENCY: 3 -L2_RESPONSE_LATENCY: 6 -L2_TAG_LATENCY: 6 -DIRECTORY_LATENCY: 80 -NETWORK_LINK_LATENCY: 1 -COPY_HEAD_LATENCY: 4 -ON_CHIP_LINK_LATENCY: 1 -RECYCLE_LATENCY: 10 -L2_RECYCLE_LATENCY: 5 -TIMER_LATENCY: 10000 -TBE_RESPONSE_LATENCY: 1 -PERIODIC_TIMER_WAKEUPS: true - - -// constants used by CMP protocols -// cache bank access times -L1_REQUEST_LATENCY: 2 -L2_REQUEST_LATENCY: 4 - - -// Number of transitions each controller state machines can complete per cycle -// i.e. the number of ports to each controller -// L1cache is the sum of the L1I and L1D cache ports -L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32 -// Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a -// much greater constraint on the concurrency of a L2 cache bank -L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32 -DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32 -DMA_TRANSITIONS_PER_RUBY_CYCLE: 1 - - -// Number of TBEs available for demand misses, ALL prefetches, and replacements -// used by one-level protocols -NUMBER_OF_TBES: 128 -// two-level protocols -NUMBER_OF_L1_TBES: 32 -NUMBER_OF_L2_TBES: 32 - -// ** INTERCONECT PARAMETERS ** -// -g_PRINT_TOPOLOGY: true -g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH -g_CACHE_DESIGN: NUCA // specifies file prefix for FILE_SPECIFIED topology -FAN_OUT_DEGREE: 4 // for HIERARCHICAL SWITCH topology - -g_adaptive_routing: true -NUMBER_OF_VIRTUAL_NETWORKS: 6 - -// bandwidth unit is 1/1000 byte per cycle. the following parameter is multiplied by -// topology specific link weights -g_endpoint_bandwidth: 10000 - - -// ** finite buffering parameters -// -// note: Finite buffering allows us to simulate a realistic virtual cut-through -// routed network with idealized flow control. this feature is NOT heavily tested -FINITE_BUFFERING: false -// All message buffers within the network (i.e. the switch's input and -// output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE -FINITE_BUFFER_SIZE: 3 -// g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests -// issued by the sequencer. The PROCESSOR_BUFFER_SIZE controlls the -// number of requests in the mandatory queue -// Only effects the simualtion when FINITE_BUFFERING is enabled -PROCESSOR_BUFFER_SIZE: 10 -// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to -// Controllers. Controlls the number of request issued by the L2 HW Prefetcher -PROTOCOL_BUFFER_SIZE: 32 -// ** end finite buffering parameters - - -// (deprecated) -// Allows on a single accesses to a multi-cycle L2 bank. -// Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY -// number of cycles. However the TBE table can be accessed in parallel. -SINGLE_ACCESS_L2_BANKS: true - - -// MOESI_CMP_token parameters (some might be deprecated) -g_FILTERING_ENABLED: false -g_DISTRIBUTED_PERSISTENT_ENABLED: true -g_RETRY_THRESHOLD: 1 -g_DYNAMIC_TIMEOUT_ENABLED: true -g_FIXED_TIMEOUT_LATENCY: 300 - - -// tester parameters (overridden by testerconfig.defaults) -// -// injects random message delays to excite protocol races -RANDOMIZATION: false -g_SYNTHETIC_DRIVER: false -g_DETERMINISTIC_DRIVER: false -g_trace_warmup_length: 1000000 -g_bash_bandwidth_adaptive_threshold: 0.75 - -g_tester_length: 0 -// # of synthetic locks == 16 * 128 -g_synthetic_locks: 2048 -g_deterministic_addrs: 1 -g_SpecifiedGenerator: DetermInvGenerator -g_callback_counter: 0 -g_NUM_COMPLETIONS_BEFORE_PASS: 0 -// parameters used by locking synthetic tester -g_think_time: 5 -g_hold_time: 5 -g_wait_time: 5 - -// Princeton Network (Garnet) -g_GARNET_NETWORK: true -g_DETAIL_NETWORK: false -g_NETWORK_TESTING: false -g_FLIT_SIZE: 16 -g_NUM_PIPE_STAGES: 4 -g_VCS_PER_CLASS: 4 -g_BUFFER_SIZE: 4 - -/////////////////////////////////////////////////////////////////////////////// -// -// MemoryControl: - -// Basic cycle time of the memory controller. This defines the period which is -// used as the memory channel clock period, the address bus bit time, and the -// memory controller cycle time. -// Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data), -// and a 2 GHz Ruby clock: -MEM_BUS_CYCLE_MULTIPLIER: 10 - -// How many internal banks in each DRAM chip: -BANKS_PER_RANK: 8 - -// How many sets of DRAM chips per DIMM. -RANKS_PER_DIMM: 2 - -// How many DIMMs per channel. (Currently the only thing that -// matters is the number of ranks per channel, i.e. the product -// of this parameter and RANKS_PER_DIMM. But if and when this is -// expanded to do FB-DIMMs, the distinction between the two -// will matter.) -DIMMS_PER_CHANNEL: 2 - -// Which bits to use to find the bank, rank, and DIMM numbers. -// You could choose to have the bank bits, rank bits, and DIMM bits -// in any order; here they are in that order. -// For these defaults, we assume this format for addresses: -// Offset within line: [5:0] -// Memory controller #: [7:6] -// Bank: [10:8] -// Rank: [11] -// DIMM: [12] -// Row addr / Col addr: [top:13] -// If you get these bits wrong, then some banks won't see any -// requests; you need to check for this in the .stats output. -BANK_BIT_0: 8 -RANK_BIT_0: 11 -DIMM_BIT_0: 12 - -// Number of entries max in each bank queues; set to whatever you want. -// If it is too small, you will see in the .stats file a lot of delay -// time spent in the common input queue. -BANK_QUEUE_SIZE: 12 - -// Bank cycle time (tRC) measured in memory cycles: -BANK_BUSY_TIME: 11 - -// This is how many memory address cycles to delay between reads to -// different ranks of DRAMs to allow for clock skew: -RANK_RANK_DELAY: 1 - -// This is how many memory address cycles to delay between a read -// and a write. This is based on two things: (1) the data bus is -// used one cycle earlier in the operation; (2) a round-trip wire -// delay from the controller to the DIMM that did the reading. -READ_WRITE_DELAY: 2 - -// Basic address and data bus occupancy. If you are assuming a -// 16-byte-wide data bus (pairs of DIMMs side-by-side), then -// the data bus occupancy matches the address bus occupancy at -// two cycles. But if the channel is only 8 bytes wide, you -// need to increase this bus occupancy time to 4 cycles. -BASIC_BUS_BUSY_TIME: 2 - -// Latency to returning read request or writeback acknowledgement. -// Measured in memory address cycles. -// This equals tRCD + CL + AL + (four bit times) -// + (round trip on channel) -// + (memory control internal delays) -// It's going to be an approximation, so pick what you like. -// Note: The fact that latency is a constant, and does not depend on two -// low-order address bits, implies that our memory controller either: -// (a) tells the DRAM to read the critical word first, and sends the -// critical word first back to the CPU, or (b) waits until it has -// seen all four bit times on the data wires before sending anything -// back. Either is plausible. If (a), remove the "four bit times" -// term from the calculation above. -MEM_CTL_LATENCY: 12 - -// refresh_period is the number of memory cycles between refresh -// of row x in bank n and refresh of row x+1 in bank n. For DDR-400, -// this is typically 7.8 usec for commercial systems; after 8192 such -// refreshes, this will have refreshed the whole chip in 64 msec. If -// we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles. The memory -// controller will divide this by the total number of banks, and kick -// off a refresh to *somebody* every time that amount is counted -// down to zero. (There will be some rounding error there, but it -// should have minimal effect.) -REFRESH_PERIOD: 1560 - -// tFAW is a DRAM chip parameter which restricts the number of -// activates that can be done within a certain window of time. -// The window is specified here in terms of number of memory -// controller cycles. At most four activates may be done during -// any such sliding window. If this number is set to be no more -// than 4 * BASIC_BUS_BUSY_TIME, it will have no effect. -// It is typical in real systems for tFAW to have no effect, but -// it may be useful in throttling power. Set to zero to ignore. -TFAW: 0 - -// By default, the memory controller uses round-robin to arbitrate -// between ready bank queues for use of the address bus. If you -// wish to add randomness to the system, set this parameter to -// one instead, and it will restart the round-robin pointer at a -// random bank number each cycle. If you want additional -// nondeterminism, set the parameter to some integer n >= 2, and -// it will in addition add a n% chance each cycle that a ready bank -// will be delayed an additional cycle. Note that if you are -// in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will -// have no effect, but MEM_RANDOM_ARBITRATE=2 or more will. -MEM_RANDOM_ARBITRATE: 0 - -// The following parameter, if nonzero, will disable the memory -// controller and instead give every request a fixed latency. The -// nonzero value specified here is measured in memory cycles and is -// just added to MEM_CTL_LATENCY. It will also show up in the stats -// file as a contributor to memory_delays_stalled_at_head_of_bank_queue. -MEM_FIXED_DELAY: 0 - -// If instead of DDR-400, you wanted DDR-800, the channel gets faster -// but the basic operation of the DRAM core is unchanged. -// Busy times appear to double just because they are measured -// in smaller clock cycles. The performance advantage comes because -// the bus busy times don't actually quite double. -// You would use something like these values: -// -// MEM_BUS_CYCLE_MULTIPLIER: 5 -// BANK_BUSY_TIME: 22 -// RANK_RANK_DELAY: 2 -// READ_WRITE_DELAY: 3 -// BASIC_BUS_BUSY_TIME: 3 -// MEM_CTL_LATENCY: 20 -// REFRESH_PERIOD: 3120 diff --git a/src/mem/ruby/config/tester.defaults b/src/mem/ruby/config/tester.defaults deleted file mode 100644 index b30d1ba99..000000000 --- a/src/mem/ruby/config/tester.defaults +++ /dev/null @@ -1,50 +0,0 @@ - -// -// This file contains tester specific changes to the rubyconfig.defaults -// parameter values. -// -// Please: - Add new variables only to rubyconfig.defaults file. -// - Change them here only when necessary. - -g_SIMICS: false -DATA_BLOCK: true -RANDOMIZATION: true -g_SYNTHETIC_DRIVER: false -g_DETERMINISTIC_DRIVER: true -g_DEADLOCK_THRESHOLD: 500000 -g_SpecifiedGenerator: DetermGETXGenerator - -PROTOCOL_DEBUG_TRACE: true - -// -// Generic cache parameters -// - -// Cache sizes are smaller for the random tester to increase the amount -// of false sharing. -L1_CACHE_ASSOC: 2 -L1_CACHE_NUM_SETS_BITS: 2 -L2_CACHE_ASSOC: 2 -L2_CACHE_NUM_SETS_BITS: 5 - -g_MEMORY_SIZE_BYTES: 1048576 - -//g_NETWORK_TOPOLOGY: FILE_SPECIFIED -RECYCLE_LATENCY: 1 -//NUMBER_OF_VIRTUAL_NETWORKS: 5 -//g_NUM_MEMORIES: 16 -L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 1000 -DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 1000 -//g_PROCS_PER_CHIP: 2 -//g_NUM_L2_BANKS: 16 -//g_endpoint_bandwidth: 10000 -//g_NUM_PROCESSORS: 16 -//g_NUM_SMT_THREADS: 1 -//g_GARNET_NETWORK: true -//g_DETAIL_NETWORK: true -//g_NETWORK_TESTING: false -//g_FLIT_SIZE: 32 -//g_NUM_PIPE_STAGES: 5 -//g_VCS_PER_CLASS: 2 -//g_BUFFER_SIZE: 4 - diff --git a/src/mem/ruby/libruby.cc b/src/mem/ruby/libruby.cc index b9a72d071..57dd13c87 100644 --- a/src/mem/ruby/libruby.cc +++ b/src/mem/ruby/libruby.cc @@ -58,11 +58,8 @@ RubyRequestType string_to_RubyRequestType(std::string str) ostream& operator<<(ostream& out, const RubyRequestType& obj) { - cerr << "in op" << endl; out << RubyRequestType_to_string(obj); - cerr << "flushing" << endl; out << flush; - cerr << "done" << endl; return out; } diff --git a/src/mem/ruby/libruby.hh b/src/mem/ruby/libruby.hh index 29aac232a..4c50611c1 100644 --- a/src/mem/ruby/libruby.hh +++ b/src/mem/ruby/libruby.hh @@ -34,7 +34,7 @@ struct RubyRequest { unsigned proc_id; RubyRequest() {} - RubyRequest(uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, unsigned _proc_id = 0) + RubyRequest(uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, unsigned _proc_id = 100) : paddr(_paddr), data(_data), len(_len), pc(_pc), type(_type), access_mode(_access_mode), proc_id(_proc_id) {} }; @@ -71,6 +71,12 @@ RubyPortHandle libruby_get_port(const char* name, void (*hit_callback)(int64_t a RubyPortHandle libruby_get_port_by_name(const char* name); +/** + * libruby_issue_request error return codes + */ +#define LIBRUBY_BUFFER_FULL -2 +#define LIBRUBY_ALIASED_REQUEST -3 + /** * issue_request returns a unique access_id to identify the ruby * transaction. This access_id is later returned to the caller via diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc index 02fc8db2a..467e1bf87 100644 --- a/src/mem/ruby/network/simple/PerfectSwitch.cc +++ b/src/mem/ruby/network/simple/PerfectSwitch.cc @@ -184,7 +184,7 @@ void PerfectSwitch::wakeup() assert(m_link_order.size() == m_routing_table.size()); assert(m_link_order.size() == m_out.size()); -//changed by SS + if (m_network_ptr->getAdaptiveRouting()) { if (m_network_ptr->isVNetOrdered(vnet)) { // Don't adaptively route diff --git a/src/mem/ruby/network/simple/Topology.cc b/src/mem/ruby/network/simple/Topology.cc index dedf79d58..563a1b01c 100644 --- a/src/mem/ruby/network/simple/Topology.cc +++ b/src/mem/ruby/network/simple/Topology.cc @@ -79,7 +79,6 @@ void Topology::init(const vector<string> & argv) m_connections = argv[i+1]; else if (argv[i] == "print_config") { m_print_config = string_to_bool(argv[i+1]); - cerr << "print config: " << m_print_config << endl; } } assert(m_network_ptr != NULL); diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index 7da3d317a..c7062262a 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -21,9 +21,8 @@ public: virtual const string toString() const = 0; // returns text version of controller type virtual const string getName() const = 0; // return instance name virtual const MachineType getMachineType() const = 0; - virtual void set_atomic(Address addr) = 0; - virtual void started_writes() = 0; - virtual void clear_atomic() = 0; + virtual void blockOnQueue(Address, MessageBuffer*) = 0; + virtual void unblock(Address) = 0; virtual void print(ostream & out) const = 0; virtual void printStats(ostream & out) const = 0; diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh index 222ff86f8..69424c414 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh @@ -94,6 +94,17 @@ MachineID map_Address_to_DMA(const Address & addr) return dma; } +inline +NetDest broadcast(MachineType type) +{ + NetDest dest; + for (int i=0; i<MachineType_base_count(type); i++) { + MachineID mach = {type, i}; + dest.add(mach); + } + return dest; +} + inline MachineID mapAddressToRange(const Address & addr, MachineType type, int low_bit, int num_bits) { diff --git a/src/mem/ruby/system/CacheMemory.cc b/src/mem/ruby/system/CacheMemory.cc index 630b94542..cf3e094ad 100644 --- a/src/mem/ruby/system/CacheMemory.cc +++ b/src/mem/ruby/system/CacheMemory.cc @@ -83,10 +83,8 @@ void CacheMemory::init(const vector<string> & argv) } } - assert(cache_size != -1); - - m_cache_num_sets = (cache_size / m_cache_assoc) / RubySystem::getBlockSizeBytes(); - assert(m_cache_num_sets > 1); + int num_lines = cache_size/RubySystem::getBlockSizeBytes(); + m_cache_num_sets = num_lines / m_cache_assoc; m_cache_num_set_bits = log_int(m_cache_num_sets); assert(m_cache_num_set_bits > 0); @@ -122,7 +120,7 @@ CacheMemory::~CacheMemory() } int -CacheMemory::numberOfLastLevelCaches() +CacheMemory::numberOfLastLevelCaches() { return m_num_last_level_caches; } @@ -165,13 +163,10 @@ int CacheMemory::findTagInSet(Index cacheSet, const Address& tag) const { assert(tag == line_address(tag)); // search the set for the tags - for (int i=0; i < m_cache_assoc; i++) { - if ((m_cache[cacheSet][i] != NULL) && - (m_cache[cacheSet][i]->m_Address == tag) && - (m_cache[cacheSet][i]->m_Permission != AccessPermission_NotPresent)) { - return i; - } - } + m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag); + if (it != m_tag_index.end()) + if (m_cache[cacheSet][it->second]->m_Permission != AccessPermission_NotPresent) + return it->second; return -1; // Not found } @@ -181,10 +176,9 @@ int CacheMemory::findTagInSetIgnorePermissions(Index cacheSet, const Address& ta { assert(tag == line_address(tag)); // search the set for the tags - for (int i=0; i < m_cache_assoc; i++) { - if (m_cache[cacheSet][i] != NULL && m_cache[cacheSet][i]->m_Address == tag) - return i; - } + m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag); + if (it != m_tag_index.end()) + return it->second; return -1; // Not found } @@ -291,6 +285,7 @@ void CacheMemory::allocate(const Address& address, AbstractCacheEntry* entry) m_cache[cacheSet][i]->m_Address = address; m_cache[cacheSet][i]->m_Permission = AccessPermission_Invalid; m_locked[cacheSet][i] = -1; + m_tag_index[address] = i; m_replacementPolicy_ptr->touch(cacheSet, i, g_eventQueue_ptr->getTime()); @@ -311,6 +306,7 @@ void CacheMemory::deallocate(const Address& address) delete m_cache[cacheSet][location]; m_cache[cacheSet][location] = NULL; m_locked[cacheSet][location] = -1; + m_tag_index.erase(address); } } diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh index 856b7bcac..8b84f33ec 100644 --- a/src/mem/ruby/system/CacheMemory.hh +++ b/src/mem/ruby/system/CacheMemory.hh @@ -54,6 +54,7 @@ #include "mem/ruby/slicc_interface/AbstractController.hh" #include "mem/ruby/profiler/CacheProfiler.hh" #include "mem/protocol/CacheMsg.hh" +#include "base/hashmap.hh" #include <vector> class CacheMemory { @@ -70,8 +71,6 @@ public: // static CacheMemory* createCache(int level, int num, char split_type, AbstractCacheEntry* (*entry_factory)()); // static CacheMemory* getCache(int cache_id); - static int numberOfLastLevelCaches(); - // Public Methods void printConfig(ostream& out); @@ -106,6 +105,8 @@ public: AccessPermission getPermission(const Address& address) const; void changePermission(const Address& address, AccessPermission new_perm); + static int numberOfLastLevelCaches(); + int getLatency() const { return m_latency; } // Hook for checkpointing the contents of the cache @@ -158,6 +159,7 @@ private: // The first index is the # of cache lines. // The second index is the the amount associativity. + m5::hash_map<Address, int> m_tag_index; Vector<Vector<AbstractCacheEntry*> > m_cache; Vector<Vector<int> > m_locked; @@ -169,9 +171,11 @@ private: int m_cache_num_set_bits; int m_cache_assoc; + static Vector< CacheMemory* > m_all_caches; + static int m_num_last_level_caches; static MachineType m_last_level_machine_type; - static Vector< CacheMemory* > m_all_caches; + }; #endif //CACHEMEMORY_H diff --git a/src/mem/ruby/system/DMASequencer.hh b/src/mem/ruby/system/DMASequencer.hh index 1f60b95ec..77c0a2258 100644 --- a/src/mem/ruby/system/DMASequencer.hh +++ b/src/mem/ruby/system/DMASequencer.hh @@ -25,6 +25,7 @@ public: void init(const vector<string> & argv); /* external interface */ int64_t makeRequest(const RubyRequest & request); + bool isReady(const RubyRequest & request, bool dont_set = false) { assert(0); return false;}; // void issueRequest(uint64_t paddr, uint8* data, int len, bool rw); bool busy() { return m_is_busy;} diff --git a/src/mem/ruby/system/DirectoryMemory.cc b/src/mem/ruby/system/DirectoryMemory.cc index e230059ad..9b2a3873c 100644 --- a/src/mem/ruby/system/DirectoryMemory.cc +++ b/src/mem/ruby/system/DirectoryMemory.cc @@ -44,7 +44,7 @@ int DirectoryMemory::m_num_directories = 0; int DirectoryMemory::m_num_directories_bits = 0; -int DirectoryMemory::m_total_size_bytes = 0; +uint64_t DirectoryMemory::m_total_size_bytes = 0; DirectoryMemory::DirectoryMemory(const string & name) : m_name(name) diff --git a/src/mem/ruby/system/DirectoryMemory.hh b/src/mem/ruby/system/DirectoryMemory.hh index 39de679ed..09211fd83 100644 --- a/src/mem/ruby/system/DirectoryMemory.hh +++ b/src/mem/ruby/system/DirectoryMemory.hh @@ -91,7 +91,7 @@ private: static int m_num_directories; static int m_num_directories_bits; - static int m_total_size_bytes; + static uint64_t m_total_size_bytes; MemoryVector* m_ram; }; diff --git a/src/mem/ruby/system/MemoryVector.hh b/src/mem/ruby/system/MemoryVector.hh index c5f3cea7f..775244840 100644 --- a/src/mem/ruby/system/MemoryVector.hh +++ b/src/mem/ruby/system/MemoryVector.hh @@ -21,61 +21,105 @@ class MemoryVector { void write(const Address & paddr, uint8* data, int len); uint8* read(const Address & paddr, uint8* data, int len); - private: - uint8* getBlockPtr(const Address & paddr); +private: + uint8* getBlockPtr(const PhysAddress & addr); uint32 m_size; - uint8* m_vec; + uint8** m_pages; + uint32 m_num_pages; + const uint32 m_page_offset_mask; }; inline MemoryVector::MemoryVector() + : m_page_offset_mask(4095) { m_size = 0; - m_vec = NULL; + m_num_pages = 0; + m_pages = NULL; } inline MemoryVector::MemoryVector(uint32 size) + : m_page_offset_mask(4095) { - m_size = size; - m_vec = new uint8[size]; + setSize(size); } inline MemoryVector::~MemoryVector() { - delete [] m_vec; + for (int i=0; i<m_num_pages; i++) { + if (m_pages[i] != 0) { + delete [] m_pages[i]; + } + } + delete [] m_pages; } inline void MemoryVector::setSize(uint32 size) { + if (m_pages != NULL){ + for (int i=0; i<m_num_pages; i++) { + if (m_pages[i] != 0) { + delete [] m_pages[i]; + } + } + delete [] m_pages; + } m_size = size; - if (m_vec != NULL) - delete [] m_vec; - m_vec = new uint8[size]; + assert(size%4096 == 0); + m_num_pages = size >> 12; + m_pages = new uint8*[m_num_pages]; + memset(m_pages, 0, m_num_pages * sizeof(uint8*)); } inline void MemoryVector::write(const Address & paddr, uint8* data, int len) { assert(paddr.getAddress() + len <= m_size); - memcpy(m_vec + paddr.getAddress(), data, len); + uint32 page_num = paddr.getAddress() >> 12; + if (m_pages[page_num] == 0) { + bool all_zeros = true; + for (int i=0;i<len;i++) { + if (data[i] != 0) { + all_zeros = false; + break; + } + } + if (all_zeros) return; + m_pages[page_num] = new uint8[4096]; + memset(m_pages[page_num], 0, 4096); + uint32 offset = paddr.getAddress() & m_page_offset_mask; + memcpy(&m_pages[page_num][offset], data, len); + } else { + memcpy(&m_pages[page_num][paddr.getAddress()&m_page_offset_mask], data, len); + } } inline uint8* MemoryVector::read(const Address & paddr, uint8* data, int len) { assert(paddr.getAddress() + len <= m_size); - memcpy(data, m_vec + paddr.getAddress(), len); + uint32 page_num = paddr.getAddress() >> 12; + if (m_pages[page_num] == 0) { + memset(data, 0, len); + } else { + memcpy(data, &m_pages[page_num][paddr.getAddress()&m_page_offset_mask], len); + } return data; } inline -uint8* MemoryVector::getBlockPtr(const Address & paddr) +uint8* MemoryVector::getBlockPtr(const PhysAddress & paddr) { - return m_vec + paddr.getAddress(); + uint32 page_num = paddr.getAddress() >> 12; + if (m_pages[page_num] == 0) { + m_pages[page_num] = new uint8[4096]; + memset(m_pages[page_num], 0, 4096); + } + return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask]; } #endif // MEMORYVECTOR_H diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index c693e0f37..b4716c346 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -27,6 +27,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "mem/ruby/libruby.hh" #include "mem/ruby/common/Global.hh" #include "mem/ruby/system/Sequencer.hh" #include "mem/ruby/system/System.hh" @@ -44,14 +45,14 @@ //Sequencer::Sequencer(int core_id, MessageBuffer* mandatory_q) #define LLSC_FAIL -2 -ostream& operator<<(std::ostream& out, const SequencerRequest& obj) { - out << obj.ruby_request << flush; - return out; -} - +long int already = 0; Sequencer::Sequencer(const string & name) :RubyPort(name) { + m_store_waiting_on_load_cycles = 0; + m_store_waiting_on_store_cycles = 0; + m_load_waiting_on_store_cycles = 0; + m_load_waiting_on_load_cycles = 0; } void Sequencer::init(const vector<string> & argv) @@ -65,8 +66,6 @@ void Sequencer::init(const vector<string> & argv) m_instCache_ptr = NULL; m_dataCache_ptr = NULL; m_controller = NULL; - m_servicing_atomic = -1; - m_atomics_counter = 0; for (size_t i=0; i<argv.size(); i+=2) { if ( argv[i] == "controller") { m_controller = RubySystem::getController(argv[i+1]); // args[i] = "L1Cache" @@ -110,8 +109,9 @@ void Sequencer::wakeup() { SequencerRequest* request = m_readRequestTable.lookup(keys[i]); if (current_time - request->issue_time >= m_deadlock_threshold) { WARN_MSG("Possible Deadlock detected"); - WARN_EXPR(request->ruby_request); + WARN_EXPR(request); WARN_EXPR(m_version); + WARN_EXPR(request->ruby_request.paddr); WARN_EXPR(keys.size()); WARN_EXPR(current_time); WARN_EXPR(request->issue_time); @@ -125,7 +125,7 @@ void Sequencer::wakeup() { SequencerRequest* request = m_writeRequestTable.lookup(keys[i]); if (current_time - request->issue_time >= m_deadlock_threshold) { WARN_MSG("Possible Deadlock detected"); - WARN_EXPR(request->ruby_request); + WARN_EXPR(request); WARN_EXPR(m_version); WARN_EXPR(current_time); WARN_EXPR(request->issue_time); @@ -145,6 +145,14 @@ void Sequencer::wakeup() { } } +void Sequencer::printStats(ostream & out) const { + out << "Sequencer: " << m_name << endl; + out << " store_waiting_on_load_cycles: " << m_store_waiting_on_load_cycles << endl; + out << " store_waiting_on_store_cycles: " << m_store_waiting_on_store_cycles << endl; + out << " load_waiting_on_load_cycles: " << m_load_waiting_on_load_cycles << endl; + out << " load_waiting_on_store_cycles: " << m_load_waiting_on_store_cycles << endl; +} + void Sequencer::printProgress(ostream& out) const{ /* int total_demand = 0; @@ -267,6 +275,7 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) { assert(m_writeRequestTable.exist(line_address(address))); SequencerRequest* request = m_writeRequestTable.lookup(address); + removeRequest(request); assert((request->ruby_request.type == RubyRequestType_ST) || @@ -274,15 +283,15 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) { (request->ruby_request.type == RubyRequestType_RMW_Write) || (request->ruby_request.type == RubyRequestType_Locked_Read) || (request->ruby_request.type == RubyRequestType_Locked_Write)); - // POLINA: the assumption is that atomics are only on data cache and not instruction cache + if (request->ruby_request.type == RubyRequestType_Locked_Read) { m_dataCache_ptr->setLocked(address, m_version); } else if (request->ruby_request.type == RubyRequestType_RMW_Read) { - m_controller->set_atomic(address); + m_controller->blockOnQueue(address, m_mandatory_q_ptr); } else if (request->ruby_request.type == RubyRequestType_RMW_Write) { - m_controller->clear_atomic(); + m_controller->unblock(address); } hitCallback(request, data); @@ -354,47 +363,33 @@ void Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data) { } // Returns true if the sequencer already has a load or store outstanding -bool Sequencer::isReady(const RubyRequest& request) { - // POLINA: check if we are currently flushing the write buffer, if so Ruby is returned as not ready - // to simulate stalling of the front-end - // Do we stall all the sequencers? If it is atomic instruction - yes! - if (m_outstanding_count >= m_max_outstanding_requests) { - return false; - } - - if( m_writeRequestTable.exist(line_address(Address(request.paddr))) || - m_readRequestTable.exist(line_address(Address(request.paddr))) ){ - //cout << "OUTSTANDING REQUEST EXISTS " << p << " VER " << m_version << endl; - //printProgress(cout); - return false; - } - - if (m_servicing_atomic != -1 && m_servicing_atomic != (int)request.proc_id) { - assert(m_atomics_counter > 0); - return false; - } - else { - if (request.type == RubyRequestType_RMW_Read) { - if (m_servicing_atomic == -1) { - assert(m_atomics_counter == 0); - m_servicing_atomic = (int)request.proc_id; - } - else { - assert(m_servicing_atomic == (int)request.proc_id); - } - m_atomics_counter++; +int Sequencer::isReady(const RubyRequest& request) { + bool is_outstanding_store = m_writeRequestTable.exist(line_address(Address(request.paddr))); + bool is_outstanding_load = m_readRequestTable.exist(line_address(Address(request.paddr))); + if ( is_outstanding_store ) { + if ((request.type == RubyRequestType_LD) || + (request.type == RubyRequestType_IFETCH) || + (request.type == RubyRequestType_RMW_Read)) { + m_store_waiting_on_load_cycles++; + } else { + m_store_waiting_on_store_cycles++; } - else if (request.type == RubyRequestType_RMW_Write) { - assert(m_servicing_atomic == (int)request.proc_id); - assert(m_atomics_counter > 0); - m_atomics_counter--; - if (m_atomics_counter == 0) { - m_servicing_atomic = -1; - } + return LIBRUBY_ALIASED_REQUEST; + } else if ( is_outstanding_load ) { + if ((request.type == RubyRequestType_ST) || + (request.type == RubyRequestType_RMW_Write) ) { + m_load_waiting_on_store_cycles++; + } else { + m_load_waiting_on_load_cycles++; } + return LIBRUBY_ALIASED_REQUEST; } - return true; + if (m_outstanding_count >= m_max_outstanding_requests) { + return LIBRUBY_BUFFER_FULL; + } + + return 1; } bool Sequencer::empty() const { @@ -405,11 +400,12 @@ bool Sequencer::empty() const { int64_t Sequencer::makeRequest(const RubyRequest & request) { assert(Address(request.paddr).getOffset() + request.len <= RubySystem::getBlockSizeBytes()); - if (isReady(request)) { + int ready = isReady(request); + if (ready > 0) { int64_t id = makeUniqueRequestID(); SequencerRequest *srequest = new SequencerRequest(request, id, g_eventQueue_ptr->getTime()); bool found = insertRequest(srequest); - if (!found) + if (!found) { if (request.type == RubyRequestType_Locked_Write) { // NOTE: it is OK to check the locked flag here as the mandatory queue will be checked first // ensuring that nothing comes between checking the flag and servicing the store @@ -420,16 +416,17 @@ int64_t Sequencer::makeRequest(const RubyRequest & request) m_dataCache_ptr->clearLocked(line_address(Address(request.paddr))); } } - if (request.type == RubyRequestType_RMW_Write) { - m_controller->started_writes(); - } issueRequest(request); - // TODO: issue hardware prefetches here - return id; - } - else { - return -1; + // TODO: issue hardware prefetches here + return id; + } + else { + assert(0); + return 0; + } + } else { + return ready; } } @@ -448,10 +445,8 @@ void Sequencer::issueRequest(const RubyRequest& request) { ctype = CacheRequestType_ST; break; case RubyRequestType_Locked_Read: - ctype = CacheRequestType_ST; - break; case RubyRequestType_Locked_Write: - ctype = CacheRequestType_ST; + ctype = CacheRequestType_ATOMIC; break; case RubyRequestType_RMW_Read: ctype = CacheRequestType_ATOMIC; diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index cf12c2a0b..1621bbbdc 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -86,10 +86,11 @@ public: // called by Tester or Simics int64_t makeRequest(const RubyRequest & request); - bool isReady(const RubyRequest& request); + int isReady(const RubyRequest& request); bool empty() const; void print(ostream& out) const; + void printStats(ostream & out) const; void checkCoherence(const Address& address); // bool getRubyMemoryValue(const Address& addr, char* value, unsigned int size_in_bytes); @@ -127,8 +128,11 @@ private: // Global outstanding request count, across all request tables int m_outstanding_count; bool m_deadlock_check_scheduled; - int m_servicing_atomic; - int m_atomics_counter; + + int m_store_waiting_on_load_cycles; + int m_store_waiting_on_store_cycles; + int m_load_waiting_on_store_cycles; + int m_load_waiting_on_load_cycles; }; // Output operator declaration diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc index ad67cdc80..4ce919618 100644 --- a/src/mem/ruby/system/System.cc +++ b/src/mem/ruby/system/System.cc @@ -335,6 +335,10 @@ void RubySystem::printStats(ostream& out) m_profiler_ptr->printStats(out); m_network_ptr->printStats(out); + for (map<string, Sequencer*>::const_iterator it = m_sequencers.begin(); + it != m_sequencers.end(); it++) { + (*it).second->printStats(out); + } for (map<string, CacheMemory*>::const_iterator it = m_caches.begin(); it != m_caches.end(); it++) { (*it).second->printStats(out); diff --git a/src/mem/slicc/ast/PeekStatementAST.py b/src/mem/slicc/ast/PeekStatementAST.py index 5186bf0d5..b63ce6516 100644 --- a/src/mem/slicc/ast/PeekStatementAST.py +++ b/src/mem/slicc/ast/PeekStatementAST.py @@ -29,8 +29,8 @@ from slicc.ast.StatementAST import StatementAST from slicc.symbols import Var class PeekStatementAST(StatementAST): - def __init__(self, slicc, queue_name, type_ast, statements, method): - super(PeekStatementAST, self).__init__(slicc) + def __init__(self, slicc, queue_name, type_ast, pairs, statements, method): + super(PeekStatementAST, self).__init__(slicc, pairs) self.queue_name = queue_name self.type_ast = type_ast @@ -63,6 +63,17 @@ class PeekStatementAST(StatementAST): in_msg_ptr = dynamic_cast<const $mtid *>(($qcode).${{self.method}}()); assert(in_msg_ptr != NULL); ''') + if self.pairs.has_key("block_on"): + address_field = self.pairs['block_on'] + code(''' + if ( (m_is_blocking == true) && + (m_block_map.count(in_msg_ptr->m_$address_field) == 1) ) { + if (m_block_map[in_msg_ptr->m_$address_field] != &$qcode) { + $qcode.delayHead(); + continue; + } + } + ''') # The other statements self.statements.generate(code, return_type) diff --git a/src/mem/slicc/parser.py b/src/mem/slicc/parser.py index 6c3f45629..226106678 100644 --- a/src/mem/slicc/parser.py +++ b/src/mem/slicc/parser.py @@ -514,8 +514,8 @@ class SLICC(Grammar): p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[6], p[8]) def p_statement__peek(self, p): - "statement : PEEK '(' var ',' type ')' statements" - p[0] = ast.PeekStatementAST(self, p[3], p[5], p[7], "peek") + "statement : PEEK '(' var ',' type pairs ')' statements" + p[0] = ast.PeekStatementAST(self, p[3], p[5], p[6], p[8], "peek") def p_statement__copy_head(self, p): "statement : COPY_HEAD '(' var ',' var pairs ')' SEMI" diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index 008438869..73c3fe720 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -185,11 +185,10 @@ public: void print(ostream& out) const; void printConfig(ostream& out) const; void wakeup(); - void set_atomic(Address addr); - void started_writes(); - void clear_atomic(); void printStats(ostream& out) const { s_profiler.dumpStats(out); } void clearStats() { s_profiler.clearStats(); } + void blockOnQueue(Address addr, MessageBuffer* port); + void unblock(Address addr); private: ''') @@ -198,17 +197,6 @@ private: for param in self.config_parameters: code('int m_${{param.ident}};') - if self.ident == "L1Cache": - code(''' -int servicing_atomic; -bool started_receiving_writes; -Address locked_read_request1; -Address locked_read_request2; -Address locked_read_request3; -Address locked_read_request4; -int read_counter; -''') - code(''' int m_number_of_TBEs; @@ -222,6 +210,8 @@ map< string, string > m_cfg; NodeID m_version; Network* m_net_ptr; MachineID m_machineID; +bool m_is_blocking; +map< Address, MessageBuffer* > m_block_map; ${ident}_Profiler s_profiler; static int m_num_controllers; // Internal functions @@ -298,16 +288,6 @@ $c_ident::$c_ident(const string &name) { ''') code.indent() - if self.ident == "L1Cache": - code(''' -servicing_atomic = 0; -started_receiving_writes = false; -locked_read_request1 = Address(-1); -locked_read_request2 = Address(-1); -locked_read_request3 = Address(-1); -locked_read_request4 = Address(-1); -read_counter = 0; -''') code('m_num_controllers++;') for var in self.objects: @@ -517,6 +497,17 @@ const MachineType $c_ident::getMachineType() const{ return MachineType_${ident}; } +void $c_ident::blockOnQueue(Address addr, MessageBuffer* port) { + m_is_blocking = true; + m_block_map[addr] = port; +} +void $c_ident::unblock(Address addr) { + m_block_map.erase(addr); + if (m_block_map.size() == 0) { + m_is_blocking = false; + } +} + void $c_ident::print(ostream& out) const { out << "[$c_ident " << m_version << "]"; } void $c_ident::printConfig(ostream& out) const { @@ -582,144 +573,12 @@ void ${ident}_Controller::wakeup() # InPorts # - # Find the position of the mandatory queue in the vector so - # that we can print it out first - - mandatory_q = None - if self.ident == "L1Cache": - for i,port in enumerate(self.in_ports): - assert "c_code_in_port" in port - if str(port).find("mandatoryQueue_in") >= 0: - assert mandatory_q is None - mandatory_q = port - - assert mandatory_q is not None - - # print out the mandatory queue here - port = mandatory_q - code('// ${ident}InPort $port') - output = port["c_code_in_port"] - - pos = output.find("TransitionResult result = doTransition((L1Cache_mandatory_request_type_to_event(((*in_msg_ptr)).m_Type)), L1Cache_getState(addr), addr);") - assert pos >= 0 - atomics_string = ''' -if ((((*in_msg_ptr)).m_Type) == CacheRequestType_ATOMIC) { - if (servicing_atomic == 0) { - if (locked_read_request1 == Address(-1)) { - assert(read_counter == 0); - locked_read_request1 = addr; - assert(read_counter == 0); - read_counter++; - } - else if (addr == locked_read_request1) { - ; // do nothing - } - else { - assert(0); // should never be here if servicing one request at a time - } - } - else if (!started_receiving_writes) { - if (servicing_atomic == 1) { - if (locked_read_request2 == Address(-1)) { - assert(locked_read_request1 != Address(-1)); - assert(read_counter == 1); - locked_read_request2 = addr; - assert(read_counter == 1); - read_counter++; - } - else if (addr == locked_read_request2) { - ; // do nothing - } - else { - assert(0); // should never be here if servicing one request at a time - } - } - else if (servicing_atomic == 2) { - if (locked_read_request3 == Address(-1)) { - assert(locked_read_request1 != Address(-1)); - assert(locked_read_request2 != Address(-1)); - assert(read_counter == 1); - locked_read_request3 = addr; - assert(read_counter == 2); - read_counter++; - } - else if (addr == locked_read_request3) { - ; // do nothing - } - else { - assert(0); // should never be here if servicing one request at a time - } - } - else if (servicing_atomic == 3) { - if (locked_read_request4 == Address(-1)) { - assert(locked_read_request1 != Address(-1)); - assert(locked_read_request2 != Address(-1)); - assert(locked_read_request3 != Address(-1)); - assert(read_counter == 1); - locked_read_request4 = addr; - assert(read_counter == 3); - read_counter++; - } - else if (addr == locked_read_request4) { - ; // do nothing - } - else { - assert(0); // should never be here if servicing one request at a time - } - } - else { - assert(0); - } - } -} -else { - if (servicing_atomic > 0) { - // reset - servicing_atomic = 0; - read_counter = 0; - started_receiving_writes = false; - locked_read_request1 = Address(-1); - locked_read_request2 = Address(-1); - locked_read_request3 = Address(-1); - locked_read_request4 = Address(-1); - } -} -''' - - output = output[:pos] + atomics_string + output[pos:] - code('$output') - for port in self.in_ports: - # don't print out mandatory queue twice - if port == mandatory_q: - continue - - if ident == "L1Cache": - if str(port).find("forwardRequestNetwork_in") >= 0: - code(''' -bool postpone = false; -if ((((*m_L1Cache_forwardToCache_ptr)).isReady())) { - const RequestMsg* in_msg_ptr; - in_msg_ptr = dynamic_cast<const RequestMsg*>(((*m_L1Cache_forwardToCache_ptr)).peek()); - if ((((servicing_atomic == 1) && (locked_read_request1 == ((*in_msg_ptr)).m_Address)) || - ((servicing_atomic == 2) && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address)) || - ((servicing_atomic == 3) && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address || locked_read_request3 == ((*in_msg_ptr)).m_Address)) || - ((servicing_atomic == 4) && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address || locked_read_request3 == ((*in_msg_ptr)).m_Address || locked_read_request1 == ((*in_msg_ptr)).m_Address)))) { - postpone = true; - } -} -if (!postpone) { -''') code.indent() code('// ${ident}InPort $port') code('${{port["c_code_in_port"]}}') code.dedent() - if ident == "L1Cache": - if str(port).find("forwardRequestNetwork_in") >= 0: - code.dedent() - code('}') - code.indent() code('') code.dedent() @@ -730,52 +589,6 @@ if (!postpone) { } ''') - if self.ident == "L1Cache": - code(''' -void ${ident}_Controller::set_atomic(Address addr) -{ - servicing_atomic++; -} - -void ${ident}_Controller::started_writes() -{ - started_receiving_writes = true; -} - -void ${ident}_Controller::clear_atomic() -{ - assert(servicing_atomic > 0); - read_counter--; - servicing_atomic--; - if (read_counter == 0) { - servicing_atomic = 0; - started_receiving_writes = false; - locked_read_request1 = Address(-1); - locked_read_request2 = Address(-1); - locked_read_request3 = Address(-1); - locked_read_request4 = Address(-1); - } -} -''') - else: - code(''' -void ${ident}_Controller::started_writes() -{ - assert(0); -} - -void ${ident}_Controller::set_atomic(Address addr) -{ - assert(0); -} - -void ${ident}_Controller::clear_atomic() -{ - assert(0); -} -''') - - code.write(path, "%s_Wakeup.cc" % self.ident) def printCSwitch(self, path): diff --git a/src/mem/slicc/symbols/Type.py b/src/mem/slicc/symbols/Type.py index 2541296dc..bafc6ea9e 100644 --- a/src/mem/slicc/symbols/Type.py +++ b/src/mem/slicc/symbols/Type.py @@ -430,8 +430,10 @@ enum ${{self.c_ident}} { # For each field for i,(ident,enum) in enumerate(self.enums.iteritems()): desc = enum.get("desc", "No description avaliable") - init = ' = %s_FIRST' % self.c_ident if i == 0 else '' - + if i == 0: + init = ' = %s_FIRST' % self.c_ident + else: + init = '' code('${{self.c_ident}}_${{enum.ident}}$init, /**< $desc */') code.dedent() code(''' |