From 5d8a669539a142ece820cf0c82722ea1c755d7cd Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:33 -0800 Subject: Resurrection of the CMP token protocol to GEM5 --- src/mem/protocol/MOESI_CMP_token-dir.sm | 866 +++++++++++++++++++++++++++++--- 1 file changed, 797 insertions(+), 69 deletions(-) (limited to 'src/mem/protocol/MOESI_CMP_token-dir.sm') diff --git a/src/mem/protocol/MOESI_CMP_token-dir.sm b/src/mem/protocol/MOESI_CMP_token-dir.sm index 1592fd123..7925a8fe0 100644 --- a/src/mem/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/protocol/MOESI_CMP_token-dir.sm @@ -32,14 +32,23 @@ */ -machine(Directory, "Token protocol") { - - MessageBuffer requestFromDir, network="To", virtual_network="1", ordered="false"; - MessageBuffer responseFromDir, network="To", virtual_network="2", ordered="false"; - - MessageBuffer persistentToDir, network="From", virtual_network="3", ordered="true"; - MessageBuffer requestToDir, network="From", virtual_network="1", ordered="false"; - MessageBuffer responseToDir, network="From", virtual_network="2", ordered="false"; +machine(Directory, "Token protocol") + : int directory_latency, + int l2_select_low_bit, + int l2_select_num_bits, + bool distributed_persistent, + int fixed_timeout_latency +{ + + MessageBuffer dmaResponseFromDir, network="To", virtual_network="0", ordered="true"; + MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; + MessageBuffer persistentFromDir, network="To", virtual_network="2", ordered="true"; + MessageBuffer requestFromDir, network="To", virtual_network="4", ordered="false"; + + MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToDir, network="From", virtual_network="2", ordered="true"; + MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; + MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; // STATES enumeration(State, desc="Directory states", default="Directory_State_O") { @@ -47,6 +56,24 @@ machine(Directory, "Token protocol") { O, desc="Owner"; NO, desc="Not Owner"; L, desc="Locked"; + + // Memory wait states - can block all messages including persistent requests + O_W, desc="transitioning to Owner, waiting for memory write"; + L_W, desc="transitioning to Locked, waiting for memory read"; + DR_L_W, desc="transitioning to Locked underneath a DMA read, waiting for memory data"; + NO_W, desc="transitioning to Not Owner, waiting for memory read"; + O_DW_W, desc="transitioning to Owner, waiting for memory before DMA ack"; + O_DR_W, desc="transitioning to Owner, waiting for memory before DMA data"; + + // DMA request transient states - must respond to persistent requests + O_DW, desc="issued GETX for DMA write, waiting for all tokens"; + NO_DW, desc="issued GETX for DMA write, waiting for all tokens"; + NO_DR, desc="issued GETS for DMA read, waiting for data"; + + // DMA request in progress - competing with a CPU persistent request + DW_L, desc="issued GETX for DMA write, CPU persistent request must complete first"; + DR_L, desc="issued GETS for DMA read, CPU persistent request must complete first"; + } // Events @@ -55,9 +82,23 @@ machine(Directory, "Token protocol") { GETS, desc="A GETS arrives"; Lockdown, desc="A lockdown request arrives"; Unlockdown, desc="An un-lockdown request arrives"; + Own_Lock_or_Unlock, desc="own lock or unlock"; Data_Owner, desc="Data arrive"; + Data_All_Tokens, desc="Data and all tokens"; Ack_Owner, desc="Owner token arrived without data because it was clean"; + Ack_Owner_All_Tokens, desc="All tokens including owner arrived without data because it was clean"; Tokens, desc="Tokens arrive"; + Ack_All_Tokens, desc="All_Tokens arrive"; + Request_Timeout, desc="A DMA request has timed out"; + + // Memory Controller + Memory_Data, desc="Fetched data from memory arrives"; + Memory_Ack, desc="Writeback Ack from memory arrives"; + + // DMA requests + DMA_READ, desc="A DMA Read memory request"; + DMA_WRITE, desc="A DMA Write memory request"; + DMA_WRITE_All_Tokens, desc="A DMA Write memory request, directory has all tokens"; } // TYPES @@ -73,7 +114,7 @@ machine(Directory, "Token protocol") { // is 'soft state' that does not need to be correct (as long as // you're eventually willing to resort to broadcast.) - Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time."; + Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time."; Set Sharers, desc="Probable sharers of the line. More accurately, the set of processors who need to see a GetX"; } @@ -82,23 +123,70 @@ machine(Directory, "Token protocol") { bool isPresent(Address); } + external_type(MemoryControl, inport="yes", outport="yes") { + + } + + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + bool okToIssueStarving(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } + + // TBE entries for DMA requests + structure(TBE, desc="TBE entries for outstanding DMA requests") { + Address PhysicalAddress, desc="physical address"; + State TBEState, desc="Transient State"; + DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory"; + DataBlock DataBlk, desc="The current view of system memory"; + int Len, desc="..."; + MachineID DmaRequestor, desc="DMA requestor"; + bool WentPersistent, desc="Did the DMA request require a persistent request"; + } + + external_type(TBETable) { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } // ** OBJECTS ** - DirectoryMemory directory, constructor_hack="i"; + DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; + + MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; + TimerTable reissueTimerTable; + + TBETable TBEs, template_hack=""; + + bool starving, default="false"; State getState(Address addr) { - return directory[addr].DirectoryState; + if (TBEs.isPresent(addr)) { + return TBEs[addr].TBEState; + } else { + return directory[addr].DirectoryState; + } } void setState(Address addr, State state) { + if (TBEs.isPresent(addr)) { + TBEs[addr].TBEState := state; + } directory[addr].DirectoryState := state; if (state == State:L) { assert(directory[addr].Tokens == 0); - } + } // We have one or zero owners assert((directory[addr].Owner.count() == 0) || (directory[addr].Owner.count() == 1)); @@ -112,19 +200,90 @@ machine(Directory, "Token protocol") { // assert(directory[addr].Tokens >= (max_tokens() / 2)); // Only mostly true; this might not always hold } } + + bool okToIssueStarving(Address addr, MachineID machinID) { + return persistentTable.okToIssueStarving(addr, machineID); + } + + void markPersistentEntries(Address addr) { + persistentTable.markEntries(addr); + } // ** OUT_PORTS ** out_port(responseNetwork_out, ResponseMsg, responseFromDir); + out_port(persistentNetwork_out, PersistentMsg, persistentFromDir); out_port(requestNetwork_out, RequestMsg, requestFromDir); + out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir); + + // + // Memory buffer for memory controller to DIMM communication + // + out_port(memQueue_out, MemoryMsg, memBuffer); // ** IN_PORTS ** + + // off-chip memory request/response is done + in_port(memQueue_in, MemoryMsg, memBuffer) { + if (memQueue_in.isReady()) { + peek(memQueue_in, MemoryMsg) { + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:Memory_Data, in_msg.Address); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:Memory_Ack, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + + // Reissue Timer + in_port(reissueTimerTable_in, Address, reissueTimerTable) { + if (reissueTimerTable_in.isReady()) { + trigger(Event:Request_Timeout, reissueTimerTable.readyAddress()); + } + } + + in_port(responseNetwork_in, ResponseMsg, responseToDir) { + if (responseNetwork_in.isReady()) { + peek(responseNetwork_in, ResponseMsg) { + assert(in_msg.Destination.isElement(machineID)); + if (directory[in_msg.Address].Tokens + in_msg.Tokens == max_tokens()) { + if ((in_msg.Type == CoherenceResponseType:DATA_OWNER) || + (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { + trigger(Event:Data_All_Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { + trigger(Event:Ack_Owner_All_Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack_All_Tokens, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } else { + if (in_msg.Type == CoherenceResponseType:DATA_OWNER) { + trigger(Event:Data_Owner, in_msg.Address); + } else if ((in_msg.Type == CoherenceResponseType:ACK) || + (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { + trigger(Event:Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { + trigger(Event:Ack_Owner, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + } in_port(persistentNetwork_in, PersistentMsg, persistentToDir) { if (persistentNetwork_in.isReady()) { peek(persistentNetwork_in, PersistentMsg) { assert(in_msg.Destination.isElement(machineID)); - if (distributedPersistentEnabled()) { + if (distributed_persistent) { // Apply the lockdown or unlockdown message to the table if (in_msg.Type == PersistentRequestType:GETX_PERSISTENT) { persistentTable.persistentRequestLock(in_msg.Address, in_msg.Requestor, AccessType:Write); @@ -173,19 +332,18 @@ machine(Directory, "Token protocol") { } } - in_port(responseNetwork_in, ResponseMsg, responseToDir) { - if (responseNetwork_in.isReady()) { - peek(responseNetwork_in, ResponseMsg) { - assert(in_msg.Destination.isElement(machineID)); - if (in_msg.Type == CoherenceResponseType:DATA_OWNER) { - trigger(Event:Data_Owner, in_msg.Address); - } else if ((in_msg.Type == CoherenceResponseType:ACK) || - (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { - trigger(Event:Tokens, in_msg.Address); - } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { - trigger(Event:Ack_Owner, in_msg.Address); + in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, DMARequestMsg) { + if (in_msg.Type == DMARequestType:READ) { + trigger(Event:DMA_READ, in_msg.LineAddress); + } else if (in_msg.Type == DMARequestType:WRITE) { + if (directory[in_msg.LineAddress].Tokens == max_tokens()) { + trigger(Event:DMA_WRITE_All_Tokens, in_msg.LineAddress); + } else { + trigger(Event:DMA_WRITE, in_msg.LineAddress); + } } else { - DEBUG_EXPR(in_msg.Type); error("Invalid message"); } } @@ -199,7 +357,7 @@ machine(Directory, "Token protocol") { if (directory[address].Tokens > 0) { peek(requestNetwork_in, RequestMsg) { // enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME? - enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME? + enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME? out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -213,11 +371,151 @@ machine(Directory, "Token protocol") { } } + action(px_tryIssuingPersistentGETXRequest, "px", desc="...") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:GETX_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + markPersistentEntries(address); + starving := true; + + TBEs[address].WentPersistent := true; + + // Do not schedule a wakeup, a persistent requests will always complete + } else { + + // We'd like to issue a persistent request, but are not allowed + // to issue a P.R. right now. This, we do not increment the + // IssueCount. + + // Set a wakeup timer + reissueTimerTable.set(address, 10); + } + } + + action(bw_broadcastWrite, "bw", desc="Broadcast GETX if we need tokens") { + peek(dmaRequestQueue_in, DMARequestMsg) { + // + // Assser that we only send message if we don't already have all the tokens + // + assert(directory[address].Tokens != max_tokens()); + enqueue(requestNetwork_out, RequestMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + + // + // Since only one chip, assuming all L1 caches are local + // + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.RetryNum := 0; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + } + } + + action(ps_tryIssuingPersistentGETSRequest, "ps", desc="...") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:GETS_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + markPersistentEntries(address); + starving := true; + + TBEs[address].WentPersistent := true; + + // Do not schedule a wakeup, a persistent requests will always complete + } else { + + // We'd like to issue a persistent request, but are not allowed + // to issue a P.R. right now. This, we do not increment the + // IssueCount. + + // Set a wakeup timer + reissueTimerTable.set(address, 10); + } + } + + action(br_broadcastRead, "br", desc="Broadcast GETS for data") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(requestNetwork_out, RequestMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + + // + // Since only one chip, assuming all L1 caches are local + // + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.RetryNum := 0; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + } + } + action(aa_sendTokensToStarver, "\a", desc="Send tokens to starver") { // Only send a message if we have tokens to send if (directory[address].Tokens > 0) { // enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME? - enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME? + enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME? out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -230,14 +528,14 @@ machine(Directory, "Token protocol") { } } - action(d_sendDataWithAllTokens, "d", desc="Send data and tokens to requestor") { - peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { + action(d_sendMemoryDataWithAllTokens, "d", desc="Send data and tokens to requestor") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:Directory; - out_msg.Destination.add(in_msg.Requestor); + out_msg.Destination.add(in_msg.OriginalRequestorMachId); assert(directory[address].Tokens > 0); out_msg.Tokens := directory[in_msg.Address].Tokens; out_msg.DataBlk := directory[in_msg.Address].DataBlk; @@ -249,21 +547,140 @@ machine(Directory, "Token protocol") { } action(dd_sendDataWithAllTokensToStarver, "\d", desc="Send data and tokens to starver") { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { - out_msg.Address := address; - out_msg.Type := CoherenceResponseType:DATA_OWNER; - out_msg.Sender := machineID; - out_msg.SenderMachine := MachineType:Directory; - out_msg.Destination.add(persistentTable.findSmallest(address)); - assert(directory[address].Tokens > 0); - out_msg.Tokens := directory[address].Tokens; - out_msg.DataBlk := directory[address].DataBlk; - out_msg.Dirty := false; - out_msg.MessageSize := MessageSizeType:Response_Data; + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_OWNER; + out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:Directory; + out_msg.Destination.add(persistentTable.findSmallest(address)); + assert(directory[address].Tokens > 0); + out_msg.Tokens := directory[address].Tokens; + out_msg.DataBlk := directory[address].DataBlk; + out_msg.Dirty := false; + out_msg.MessageSize := MessageSizeType:Response_Data; + } } directory[address].Tokens := 0; } + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { + peek(requestNetwork_in, RequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(fd_memoryDma, "fd", desc="Queue off-chip fetch request") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(lq_queueMemoryWbRequest, "lq", desc="Write data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + DEBUG_EXPR(out_msg); + } + } + + action(ld_queueMemoryDmaWriteFromTbe, "ld", desc="Write DMA data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + // then add the dma write data + out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + DEBUG_EXPR(out_msg); + } + } + + action(lr_queueMemoryDmaReadWriteback, "lr", desc="Write DMA data from read to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + + action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaDataBlk := in_msg.DataBlk; + TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + TBEs[address].WentPersistent := false; + } + } + + action(s_deallocateTBE, "s", desc="Deallocate TBE") { + + if (TBEs[address].WentPersistent) { + assert(starving == true); + + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + } + starving := false; + } + + TBEs.deallocate(address); + } + + action(rd_recordDataInTbe, "rd", desc="Record data in TBE") { + peek(responseNetwork_in, ResponseMsg) { + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + + action(cd_writeCleanDataToTbe, "cd", desc="Write clean memory data to TBE") { + TBEs[address].DataBlk := directory[address].DataBlk; + } + + action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { + directory[address].DataBlk := TBEs[address].DataBlk; + directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + } + action(f_incrementTokens, "f", desc="Increment the number of tokens we're tracking") { peek(responseNetwork_in, ResponseMsg) { assert(in_msg.Tokens >= 1); @@ -275,14 +692,34 @@ machine(Directory, "Token protocol") { requestNetwork_in.dequeue(); } + action(z_recycleRequest, "z", desc="Recycle the request queue") { + requestNetwork_in.recycle(); + } + action(k_popIncomingResponseQueue, "k", desc="Pop incoming response queue") { responseNetwork_in.dequeue(); } + action(kz_recycleResponse, "kz", desc="Recycle incoming response queue") { + responseNetwork_in.recycle(); + } + action(l_popIncomingPersistentQueue, "l", desc="Pop incoming persistent queue") { persistentNetwork_in.dequeue(); } + action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") { + dmaRequestQueue_in.recycle(); + } + + action(l_popMemQueue, "q", desc="Pop off-chip request queue") { + memQueue_in.dequeue(); + } + action(m_writeDataToMemory, "m", desc="Write dirty writeback to memory") { peek(responseNetwork_in, ResponseMsg) { directory[in_msg.Address].DataBlk := in_msg.DataBlk; @@ -291,18 +728,15 @@ machine(Directory, "Token protocol") { } } - action(n_checkIncomingMsg, "n", desc="Check incoming token message") { + action(n_checkData, "n", desc="Check incoming clean data message") { peek(responseNetwork_in, ResponseMsg) { - assert(in_msg.Type == CoherenceResponseType:ACK_OWNER); - assert(in_msg.Dirty == false); - assert(in_msg.MessageSize == MessageSizeType:Writeback_Control); assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk); } } action(r_bounceResponse, "r", desc="Bounce response to starving processor") { peek(responseNetwork_in, ResponseMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -316,7 +750,20 @@ machine(Directory, "Token protocol") { } } - action(s_bounceDatalessOwnerToken, "s", desc="Bounce clean owner token to starving processor") { + action(st_scheduleTimeout, "st", desc="Schedule Timeout") { + // + // currently only support a fixed timeout latency + // + reissueTimerTable.set(address, fixed_timeout_latency); + } + + action(ut_unsetReissueTimer, "ut", desc="Unset reissue timer.") { + if (reissueTimerTable.isSet(address)) { + reissueTimerTable.unset(address); + } + } + + action(bd_bounceDatalessOwnerToken, "bd", desc="Bounce clean owner token to starving processor") { peek(responseNetwork_in, ResponseMsg) { assert(in_msg.Type == CoherenceResponseType:ACK_OWNER); assert(in_msg.Dirty == false); @@ -331,7 +778,7 @@ machine(Directory, "Token protocol") { // Bounce the message, but "re-associate" the data and the owner // token. In essence we're converting an ACK_OWNER message to a // DATA_OWNER message, keeping the number of tokens the same. - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -346,53 +793,212 @@ machine(Directory, "Token protocol") { } } + action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:ACK; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + + action(dm_sendMemoryDataToDma, "dm", desc="Send Data to DMA controller from memory") { + peek(memQueue_in, MemoryMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dd_sendDmaData, "dd", desc="Send Data to DMA controller") { + peek(responseNetwork_in, ResponseMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } // TRANSITIONS - // Trans. from O - transition(O, GETX, NO) { - d_sendDataWithAllTokens; + // + // Trans. from base state O + // the directory has valid data + // + transition(O, GETX, NO_W) { + qf_queueMemoryFetchRequest; j_popIncomingRequestQueue; } - transition(O, GETS, NO) { - d_sendDataWithAllTokens; + transition(O, DMA_WRITE, O_DW) { + vd_allocateDmaRequestInTBE; + bw_broadcastWrite; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + + transition(O, DMA_WRITE_All_Tokens, O_DW_W) { + vd_allocateDmaRequestInTBE; + cd_writeCleanDataToTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + p_popDmaRequestQueue; + } + + transition(O, GETS, NO_W) { + qf_queueMemoryFetchRequest; j_popIncomingRequestQueue; } - transition(O, Lockdown, L) { - dd_sendDataWithAllTokensToStarver; + transition(O, DMA_READ, O_DR_W) { + vd_allocateDmaRequestInTBE; + fd_memoryDma; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + + transition(O, Lockdown, L_W) { + qf_queueMemoryFetchRequest; + l_popIncomingPersistentQueue; + } + + transition(O, {Tokens, Ack_All_Tokens}) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + transition(O, {Data_Owner, Data_All_Tokens}) { + n_checkData; + f_incrementTokens; + k_popIncomingResponseQueue; + } + + // + // transitioning to Owner, waiting for memory before DMA ack + // All other events should recycle/stall + // + transition(O_DR_W, Memory_Data, O) { + dm_sendMemoryDataToDma; + ut_unsetReissueTimer; + s_deallocateTBE; + l_popMemQueue; + } + + // + // issued GETX for DMA write, waiting for all tokens + // + transition(O_DW, Tokens) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + transition(O_DW, Data_Owner) { + f_incrementTokens; + rd_recordDataInTbe; + k_popIncomingResponseQueue; + } + + transition(O_DW, Ack_Owner) { + f_incrementTokens; + cd_writeCleanDataToTbe; + k_popIncomingResponseQueue; + } + + transition(O_DW, Lockdown, DW_L) { l_popIncomingPersistentQueue; } - transition(O, Tokens) { + transition({NO_DW, O_DW}, Data_All_Tokens, O_DW_W) { f_incrementTokens; + rd_recordDataInTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; k_popIncomingResponseQueue; } + transition(O_DW, Ack_All_Tokens, O_DW_W) { + f_incrementTokens; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; + k_popIncomingResponseQueue; + } + + transition(O_DW, Ack_Owner_All_Tokens, O_DW_W) { + f_incrementTokens; + cd_writeCleanDataToTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; + k_popIncomingResponseQueue; + } + + transition(O_DW_W, Memory_Ack, O) { + da_sendDmaAck; + s_deallocateTBE; + l_popMemQueue; + } + + // // Trans. from NO + // The direcotry does not have valid data, but may have some tokens + // transition(NO, GETX) { a_sendTokens; j_popIncomingRequestQueue; } + transition(NO, DMA_WRITE, NO_DW) { + vd_allocateDmaRequestInTBE; + bw_broadcastWrite; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + transition(NO, GETS) { j_popIncomingRequestQueue; } + transition(NO, DMA_READ, NO_DR) { + vd_allocateDmaRequestInTBE; + br_broadcastRead; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + transition(NO, Lockdown, L) { aa_sendTokensToStarver; l_popIncomingPersistentQueue; } - transition(NO, Data_Owner, O) { + transition(NO, {Data_Owner, Data_All_Tokens}, O_W) { m_writeDataToMemory; f_incrementTokens; + lq_queueMemoryWbRequest; k_popIncomingResponseQueue; } - transition(NO, Ack_Owner, O) { - n_checkIncomingMsg; + transition(NO, {Ack_Owner, Ack_Owner_All_Tokens}, O) { + n_checkData; f_incrementTokens; k_popIncomingResponseQueue; } @@ -402,34 +1008,156 @@ machine(Directory, "Token protocol") { k_popIncomingResponseQueue; } + transition(NO_W, Memory_Data, NO) { + d_sendMemoryDataWithAllTokens; + l_popMemQueue; + } + + // Trans. from NO_DW + transition(NO_DW, Request_Timeout) { + ut_unsetReissueTimer; + px_tryIssuingPersistentGETXRequest; + } + + transition(NO_DW, Lockdown, DW_L) { + aa_sendTokensToStarver; + l_popIncomingPersistentQueue; + } + + // Note: NO_DW, Data_All_Tokens transition is combined with O_DW + // Note: NO_DW should not receive the action Ack_All_Tokens because the + // directory does not have valid data + + transition(NO_DW, Data_Owner, O_DW) { + f_incrementTokens; + rd_recordDataInTbe; + lq_queueMemoryWbRequest; + k_popIncomingResponseQueue; + } + + transition({NO_DW, NO_DR}, Tokens) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + // Trans. from NO_DR + transition(NO_DR, Request_Timeout) { + ut_unsetReissueTimer; + ps_tryIssuingPersistentGETSRequest; + } + + transition(NO_DR, Lockdown, DR_L) { + aa_sendTokensToStarver; + l_popIncomingPersistentQueue; + } + + transition(NO_DR, {Data_Owner, Data_All_Tokens}, O_W) { + m_writeDataToMemory; + f_incrementTokens; + dd_sendDmaData; + lr_queueMemoryDmaReadWriteback; + ut_unsetReissueTimer; + s_deallocateTBE; + k_popIncomingResponseQueue; + } + // Trans. from L - transition(L, {GETX, GETS}) { + transition({L, DW_L, DR_L}, {GETX, GETS}) { j_popIncomingRequestQueue; } - transition(L, Lockdown) { + transition({L, DW_L, DR_L, L_W, DR_L_W}, Lockdown) { l_popIncomingPersistentQueue; } - // we could change this to write the data to memory and send it cleanly - transition(L, Data_Owner) { + // + // Received data for lockdown blocks + // For blocks with outstanding dma requests to them + // ...we could change this to write the data to memory and send it cleanly + // ...we could also proactively complete our DMA requests + // However, to keep my mind from spinning out-of-control, we won't for now :) + // + transition({DW_L, DR_L, L}, {Data_Owner, Data_All_Tokens}) { r_bounceResponse; k_popIncomingResponseQueue; } - transition(L, Tokens) { + transition({DW_L, DR_L, L}, Tokens) { r_bounceResponse; k_popIncomingResponseQueue; } - transition(L, Ack_Owner) { - s_bounceDatalessOwnerToken; + transition({DW_L, DR_L, L}, {Ack_Owner_All_Tokens, Ack_Owner}) { + bd_bounceDatalessOwnerToken; k_popIncomingResponseQueue; } - transition(L, Unlockdown, NO) { l_popIncomingPersistentQueue; } + transition(L_W, Memory_Data, L) { + dd_sendDataWithAllTokensToStarver; + l_popMemQueue; + } + + transition(DR_L_W, Memory_Data, DR_L) { + dd_sendDataWithAllTokensToStarver; + l_popMemQueue; + } + + transition(DW_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DW) { + l_popIncomingPersistentQueue; + } + + transition(DR_L_W, {Unlockdown, Own_Lock_or_Unlock}, O_DR_W) { + l_popIncomingPersistentQueue; + } + + transition({DW_L, DR_L_W}, Request_Timeout) { + ut_unsetReissueTimer; + px_tryIssuingPersistentGETXRequest; + } + + transition(DR_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DR) { + l_popIncomingPersistentQueue; + } + + transition(DR_L, Request_Timeout) { + ut_unsetReissueTimer; + ps_tryIssuingPersistentGETSRequest; + } + + transition(O_W, Memory_Ack, O) { + l_popMemQueue; + } + + transition({O, NO, L, O_DW, NO_DW, NO_DR}, Own_Lock_or_Unlock) { + l_popIncomingPersistentQueue; + } + + // Blocked states + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR}, {GETX, GETS}) { + z_recycleRequest; + } + + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR, L, DW_L, DR_L}, {DMA_READ, DMA_WRITE}) { + y_recycleDmaRequestQueue; + } + + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W}, {Data_Owner, Ack_Owner, Tokens}) { + kz_recycleResponse; + } + + transition({NO_W, O_W}, Lockdown, L_W) { + l_popIncomingPersistentQueue; + } + + transition(O_DR_W, Lockdown, DR_L_W) { + l_popIncomingPersistentQueue; + } + + transition({NO_W, O_W, O_DR_W}, {Unlockdown, Own_Lock_or_Unlock}) { + l_popIncomingPersistentQueue; + } } -- cgit v1.2.3