From cef3c5616358912b45aafac490bf182c1a8def04 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:32 -0800 Subject: ruby: MOESI hammer support for DMA reads and writes --- src/mem/protocol/MOESI_hammer-dir.sm | 517 ++++++++++++++++++++++++++++++----- 1 file changed, 455 insertions(+), 62 deletions(-) (limited to 'src/mem/protocol/MOESI_hammer-dir.sm') diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm index 49efaffb6..b9b001e40 100644 --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -39,11 +39,17 @@ machine(Directory, "AMD Hammer-like protocol") MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false"; MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; - //MessageBuffer dmaRequestFromDir, network="To", virtual_network="4", ordered="true"; + // + // For a finite buffered network, note that the DMA response network only + // works at this relatively higher numbered (lower priority) virtual network + // because the trigger queue decouples cache responses from DMA responses. + // + MessageBuffer dmaResponseFromDir, network="To", virtual_network="4", ordered="true"; - MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; MessageBuffer unblockToDir, network="From", virtual_network="0", ordered="false"; - //MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; + MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false"; + MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; + MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; // STATES enumeration(State, desc="Directory states", default="Directory_State_E") { @@ -57,6 +63,13 @@ machine(Directory, "AMD Hammer-like protocol") O_B_W, desc="Owner, Blocked, waiting for Dram"; NO_W, desc="Not Owner, waiting for Dram"; O_W, desc="Owner, waiting for Dram"; + NO_DW_B_W, desc="Not Owner, Dma Write waiting for Dram and cache responses"; + NO_DR_B_W, desc="Not Owner, Dma Read waiting for Dram and cache responses"; + NO_DR_B_D, desc="Not Owner, Dma Read waiting for cache responses including dirty data"; + NO_DR_B, desc="Not Owner, Dma Read waiting for cache responses"; + NO_DW_W, desc="Not Owner, Dma Write waiting for Dram"; + O_DR_B_W, desc="Owner, Dma Read waiting for Dram and cache responses"; + O_DR_B, desc="Owner, Dma Read waiting for cache responses"; WB, desc="Blocked on a writeback"; WB_O_W, desc="Blocked on memory write, will go to O"; WB_E_W, desc="Blocked on memory write, will go to E"; @@ -73,9 +86,23 @@ machine(Directory, "AMD Hammer-like protocol") Writeback_Exclusive_Clean, desc="The final part of a PutX (no data, exclusive)"; Writeback_Exclusive_Dirty, desc="The final part of a PutX (data, exclusive)"; + // DMA requests + DMA_READ, desc="A DMA Read memory request"; + DMA_WRITE, desc="A DMA Write memory request"; + // Memory Controller Memory_Data, desc="Fetched data from memory arrives"; Memory_Ack, desc="Writeback Ack from memory arrives"; + + // Cache responses required to handle DMA + Ack, desc="Received an ack message"; + Shared_Ack, desc="Received an ack message, responder has a shared copy"; + Shared_Data, desc="Received a data message, responder has a shared copy"; + Exclusive_Data, desc="Received a data message, responder had an exclusive copy, they gave it to us"; + + // Triggers + All_acks_and_data, desc="Received all required data and message acks"; + All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy"; } // TYPES @@ -100,9 +127,13 @@ machine(Directory, "AMD Hammer-like protocol") Address PhysicalAddress, desc="physical address"; State TBEState, desc="Transient State"; CoherenceResponseType ResponseType, desc="The type for the subsequent response message"; - DataBlock DataBlk, desc="Data to be written (DMA write only)"; + DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory"; + DataBlock DataBlk, desc="The current view of system memory"; int Len, desc="..."; MachineID DmaRequestor, desc="DMA requestor"; + int NumPendingMsgs, desc="Number of pending acks/messages"; + bool CacheDirty, desc="Indicates whether a cache has responded with dirty data"; + bool Sharers, desc="Indicates whether a cache has indicated it is currently a sharer"; } external_type(TBETable) { @@ -135,10 +166,14 @@ machine(Directory, "AMD Hammer-like protocol") directory[addr].DirectoryState := state; } + MessageBuffer triggerQueue, ordered="true"; + // ** OUT_PORTS ** + out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests out_port(forwardNetwork_out, RequestMsg, forwardFromDir); out_port(responseNetwork_out, ResponseMsg, responseFromDir); - out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests + out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); // // Memory buffer for memory controller to DIMM communication @@ -147,6 +182,21 @@ machine(Directory, "AMD Hammer-like protocol") // ** IN_PORTS ** + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue) { + if (triggerQueue_in.isReady()) { + peek(triggerQueue_in, TriggerMsg) { + if (in_msg.Type == TriggerType:ALL_ACKS) { + trigger(Event:All_acks_and_data, in_msg.Address); + } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) { + trigger(Event:All_acks_and_data_no_sharers, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + in_port(unblockNetwork_in, ResponseMsg, unblockToDir) { if (unblockNetwork_in.isReady()) { peek(unblockNetwork_in, ResponseMsg) { @@ -167,6 +217,39 @@ machine(Directory, "AMD Hammer-like protocol") } } + // Response Network + in_port(responseToDir_in, ResponseMsg, responseToDir) { + if (responseToDir_in.isReady()) { + peek(responseToDir_in, ResponseMsg) { + if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_SHARED) { + trigger(Event:Shared_Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) { + trigger(Event:Shared_Data, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) { + trigger(Event:Exclusive_Data, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, DMARequestMsg) { + if (in_msg.Type == DMARequestType:READ) { + trigger(Event:DMA_READ, in_msg.LineAddress); + } else if (in_msg.Type == DMARequestType:WRITE) { + trigger(Event:DMA_WRITE, in_msg.LineAddress); + } else { + error("Invalid message"); + } + } + } + } + in_port(requestQueue_in, RequestMsg, requestToDir) { if (requestQueue_in.isReady()) { peek(requestQueue_in, RequestMsg) { @@ -233,10 +316,61 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaDataBlk := in_msg.DataBlk; + TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; + // + // One ack for each last-level cache + // + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); + // + // Assume initially that the caches store a clean copy and that memory + // will provide the data + // + TBEs[address].CacheDirty := false; + } + } + action(w_deallocateTBE, "w", desc="Deallocate TBE") { TBEs.deallocate(address); } + action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") { + peek(responseToDir_in, ResponseMsg) { + assert(in_msg.Acks > 0); + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + // + // Note that cache data responses will have an ack count of 2. However, + // directory DMA requests must wait for acks from all LLC caches, so + // only decrement by 1. + // + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - 1; + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + } + } + + action(n_popResponseQueue, "n", desc="Pop response queue") { + responseToDir_in.dequeue(); + } + + action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") { + if (TBEs[address].NumPendingMsgs == 0) { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.Address := address; + if (TBEs[address].Sharers) { + out_msg.Type := TriggerType:ALL_ACKS; + } else { + out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS; + } + } + } + } + action(d_sendData, "d", desc="Send data to requestor") { peek(memQueue_in, MemoryMsg) { enqueue(responseNetwork_out, ResponseMsg, latency="1") { @@ -252,18 +386,66 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(dr_sendDmaData, "dr", desc="Send Data to DMA controller from memory") { + peek(memQueue_in, MemoryMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dt_sendDmaDataFromTbe, "dt", desc="Send Data to DMA controller from tbe") { + peek(triggerQueue_in, TriggerMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:ACK; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + action(rx_recordExclusiveInTBE, "rx", desc="Record Exclusive in TBE") { peek(requestQueue_in, RequestMsg) { TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; } } - action(r_recordDataInTBE, "r", desc="Record Data in TBE") { + action(r_recordDataInTBE, "rt", desc="Record Data in TBE") { peek(requestQueue_in, RequestMsg) { TBEs[address].ResponseType := CoherenceResponseType:DATA; } } + action(r_setSharerBit, "r", desc="We saw other sharers") { + TBEs[address].Sharers := true; + } + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { peek(requestQueue_in, RequestMsg) { enqueue(memQueue_out, MemoryMsg, latency="1") { @@ -272,56 +454,25 @@ machine(Directory, "AMD Hammer-like protocol") out_msg.Sender := machineID; out_msg.OriginalRequestorMachId := in_msg.Requestor; out_msg.MessageSize := in_msg.MessageSize; - out_msg.DataBlk := directory[in_msg.Address].DataBlk; + out_msg.DataBlk := directory[address].DataBlk; DEBUG_EXPR(out_msg); } } } -// action(qx_queueMemoryFetchExclusiveRequest, "xf", desc="Queue off-chip fetch request") { -// peek(requestQueue_in, RequestMsg) { -// enqueue(memQueue_out, MemoryMsg, latency=memory_request_latency) { -// out_msg.Address := address; -// out_msg.Type := MemoryRequestType:MEMORY_READ; -// out_msg.ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; -// out_msg.Sender := machineID; -// out_msg.OriginalRequestorMachId := in_msg.Requestor; -// out_msg.MessageSize := in_msg.MessageSize; -// out_msg.DataBlk := directory[in_msg.Address].DataBlk; -// DEBUG_EXPR(out_msg); -// } -// } -// } - -// action(d_sendData, "d", desc="Send data to requestor") { -// peek(requestQueue_in, RequestMsg) { -// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { -// out_msg.Address := address; -// out_msg.Type := CoherenceResponseType:DATA; -// out_msg.Sender := machineID; -// out_msg.Destination.add(in_msg.Requestor); -// out_msg.DataBlk := directory[in_msg.Address].DataBlk; -// out_msg.Dirty := false; // By definition, the block is now clean -// out_msg.Acks := 1; -// out_msg.MessageSize := MessageSizeType:Response_Data; -// } -// } -// } - -// action(dd_sendExclusiveData, "\d", desc="Send exclusive data to requestor") { -// peek(requestQueue_in, RequestMsg) { -// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { -// out_msg.Address := address; -// out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; -// out_msg.Sender := machineID; -// out_msg.Destination.add(in_msg.Requestor); -// out_msg.DataBlk := directory[in_msg.Address].DataBlk; -// out_msg.Dirty := false; // By definition, the block is now clean -// out_msg.Acks := 1; -// out_msg.MessageSize := MessageSizeType:Response_Data; -// } -// } -// } + action(qd_queueMemoryRequestFromDmaRead, "qd", desc="Queue off-chip fetch request") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } action(f_forwardRequest, "f", desc="Forward requests") { if (getNumberOfLastLevelCaches() > 1) { @@ -338,6 +489,38 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(f_forwardWriteFromDma, "fw", desc="Forward requests") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + // + // Send to all L1 caches, since the requestor is the memory controller + // itself + // + out_msg.Requestor := machineID; + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + + action(f_forwardReadFromDma, "fr", desc="Forward requests") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + // + // Send to all L1 caches, since the requestor is the memory controller + // itself + // + out_msg.Requestor := machineID; + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") { requestQueue_in.dequeue(); } @@ -350,16 +533,52 @@ machine(Directory, "AMD Hammer-like protocol") memQueue_in.dequeue(); } + action(g_popTriggerQueue, "g", desc="Pop trigger queue") { + triggerQueue_in.dequeue(); + } + + action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") { + dmaRequestQueue_in.recycle(); + } + + action(r_recordMemoryData, "rd", desc="record data from memory to TBE") { + peek(memQueue_in, MemoryMsg) { + if (TBEs[address].CacheDirty == false) { + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + } + + action(r_recordCacheData, "rc", desc="record data from cache response to TBE") { + peek(responseToDir_in, ResponseMsg) { + TBEs[address].CacheDirty := true; + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + action(l_writeDataToMemory, "l", desc="Write PUTX/PUTO data to memory") { peek(unblockNetwork_in, ResponseMsg) { assert(in_msg.Dirty); assert(in_msg.MessageSize == MessageSizeType:Writeback_Data); - directory[in_msg.Address].DataBlk := in_msg.DataBlk; + directory[address].DataBlk := in_msg.DataBlk; DEBUG_EXPR(in_msg.Address); DEBUG_EXPR(in_msg.DataBlk); } } + action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { + directory[address].DataBlk := TBEs[address].DataBlk; + directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + } + + action(a_assertCacheData, "ac", desc="Assert that a cache provided the data") { + assert(TBEs[address].CacheDirty); + } + action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") { peek(unblockNetwork_in, ResponseMsg) { enqueue(memQueue_out, MemoryMsg, latency="1") { @@ -370,6 +589,18 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(ld_queueMemoryDmaWrite, "ld", desc="Write DMA data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + // then add the dma write data + out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + DEBUG_EXPR(out_msg); + } + } + action(ll_checkIncomingWriteback, "\l", desc="Check PUTX/PUTO response message") { peek(unblockNetwork_in, ResponseMsg) { assert(in_msg.Dirty == false); @@ -379,20 +610,17 @@ machine(Directory, "AMD Hammer-like protocol") // implementation. We include the data in the "dataless" // message so we can assert the clean data matches the datablock // in memory - assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk); + assert(directory[address].DataBlk == in_msg.DataBlk); } } - // action(z_stall, "z", desc="Cannot be handled right now.") { - // Special name recognized as do nothing case - // } - action(zz_recycleRequest, "\z", desc="Recycle the request queue") { requestQueue_in.recycle(); } // TRANSITIONS + // Transitions out of E state transition(E, GETX, NO_B_W) { v_allocateTBE; rx_recordExclusiveInTBE; @@ -409,7 +637,14 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } - // + transition(E, DMA_READ, NO_DR_B_W) { + vd_allocateDmaRequestInTBE; + qd_queueMemoryRequestFromDmaRead; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + // Transitions out of O state transition(O, GETX, NO_B_W) { v_allocateTBE; r_recordDataInTBE; @@ -426,7 +661,20 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } - // + transition(O, DMA_READ, O_DR_B_W) { + vd_allocateDmaRequestInTBE; + qd_queueMemoryRequestFromDmaRead; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + transition({E, O, NO}, DMA_WRITE, NO_DW_B_W) { + vd_allocateDmaRequestInTBE; + f_forwardWriteFromDma; + p_popDmaRequestQueue; + } + + // Transitions out of NO state transition(NO, GETX, NO_B) { f_forwardRequest; i_popIncomingRequestQueue; @@ -442,16 +690,33 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } + transition(NO, DMA_READ, NO_DR_B_D) { + vd_allocateDmaRequestInTBE; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + // Nack PUT requests when races cause us to believe we own the data transition({O, E}, PUT) { b_sendWriteBackNack; i_popIncomingRequestQueue; } - // Blocked states - transition({NO_B, O_B, NO_B_W, O_B_W, NO_W, O_W, WB, WB_E_W, WB_O_W}, {GETS, GETX, PUT}) { + // Blocked transient states + transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, + NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, + NO_W, O_W, WB, WB_E_W, WB_O_W}, + {GETS, GETX, PUT}) { zz_recycleRequest; } + transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, + NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, + NO_W, O_W, WB, WB_E_W, WB_O_W}, + {DMA_READ, DMA_WRITE}) { + y_recycleDmaRequestQueue; + } + transition(NO_B, Unblock, NO) { j_popIncomingUnblockQueue; } @@ -466,6 +731,134 @@ machine(Directory, "AMD Hammer-like protocol") l_popMemQueue; } + transition(NO_DR_B_W, Memory_Data, NO_DR_B) { + r_recordMemoryData; + o_checkForCompletion; + l_popMemQueue; + } + + transition(O_DR_B_W, Memory_Data, O_DR_B) { + r_recordMemoryData; + dr_sendDmaData; + o_checkForCompletion; + l_popMemQueue; + } + + transition({NO_DR_B, O_DR_B, NO_DR_B_D, NO_DW_B_W}, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Ack) { + m_decrementNumberOfMessages; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D}, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Shared_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D}, Shared_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Exclusive_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D, NO_DW_B_W}, Exclusive_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B, All_acks_and_data, O) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B_D, All_acks_and_data, O) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(O_DR_B, All_acks_and_data_no_sharers, O) { + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B, All_acks_and_data_no_sharers, E) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B_D, All_acks_and_data_no_sharers, E) { + a_assertCacheData; + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DW_B_W, All_acks_and_data_no_sharers, NO_DW_W) { + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWrite; + g_popTriggerQueue; + } + + transition(NO_DW_W, Memory_Ack, E) { + da_sendDmaAck; + w_deallocateTBE; + l_popMemQueue; + } + transition(O_B_W, Memory_Data, O_B) { d_sendData; w_deallocateTBE; @@ -490,7 +883,7 @@ machine(Directory, "AMD Hammer-like protocol") l_popMemQueue; } - // WB + // WB State Transistions transition(WB, Writeback_Dirty, WB_E_W) { l_writeDataToMemory; l_queueMemoryWBRequest; -- cgit v1.2.3