diff options
author | Derek Hower <drh5@cs.wisc.edu> | 2010-01-19 15:48:12 -0600 |
---|---|---|
committer | Derek Hower <drh5@cs.wisc.edu> | 2010-01-19 15:48:12 -0600 |
commit | 279f179babc9e5663156777c533c06edc91bce9a (patch) | |
tree | e6718ee514cc81678491b50562ce8c463c0b20fd /src/mem/protocol | |
parent | 5aa104e072eb20f6aca49b169521b0c2da33c844 (diff) | |
parent | 295516a590b6e47c9a881f193027447e500c749c (diff) | |
download | gem5-279f179babc9e5663156777c533c06edc91bce9a.tar.xz |
merge
Diffstat (limited to 'src/mem/protocol')
-rw-r--r-- | src/mem/protocol/MI_example-dir.sm | 24 | ||||
-rw-r--r-- | src/mem/protocol/MI_example-dma.sm | 6 | ||||
-rw-r--r-- | src/mem/protocol/MI_example-msg.sm | 1 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-L1cache.sm | 353 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-L2cache.sm | 174 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-dir.sm | 866 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-dma.sm | 165 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-msg.sm | 54 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token.slicc | 1 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-cache.sm | 1132 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-dir.sm | 920 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-dma.sm | 165 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-msg.sm | 119 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer.slicc | 5 | ||||
-rw-r--r-- | src/mem/protocol/RubySlicc_ComponentMapping.sm | 2 | ||||
-rw-r--r-- | src/mem/protocol/RubySlicc_Util.sm | 1 | ||||
-rw-r--r-- | src/mem/protocol/SConscript | 78 | ||||
-rw-r--r-- | src/mem/protocol/SConsopts | 3 |
18 files changed, 3747 insertions, 322 deletions
diff --git a/src/mem/protocol/MI_example-dir.sm b/src/mem/protocol/MI_example-dir.sm index 17f39af5b..1f64d25df 100644 --- a/src/mem/protocol/MI_example-dir.sm +++ b/src/mem/protocol/MI_example-dir.sm @@ -1,8 +1,6 @@ machine(Directory, "Directory protocol") -: int directory_latency, - int dma_select_low_bit, - int dma_select_num_bits +: int directory_latency { MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false"; @@ -74,6 +72,7 @@ machine(Directory, "Directory protocol") State TBEState, desc="Transient State"; DataBlock DataBlk, desc="Data to be written (DMA write only)"; int Len, desc="..."; + MachineID DmaRequestor, desc="DMA requestor"; } external_type(TBETable) { @@ -243,8 +242,7 @@ machine(Directory, "Directory protocol") out_msg.LineAddress := address; out_msg.Type := DMAResponseType:DATA; out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be - out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA, - dma_select_low_bit, dma_select_num_bits)); + out_msg.Destination.add(TBEs[address].DmaRequestor); out_msg.MessageSize := MessageSizeType:Response_Data; } } @@ -259,8 +257,7 @@ machine(Directory, "Directory protocol") out_msg.LineAddress := address; out_msg.Type := DMAResponseType:DATA; out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be - out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA, - dma_select_low_bit, dma_select_num_bits)); + out_msg.Destination.add(TBEs[address].DmaRequestor); out_msg.MessageSize := MessageSizeType:Response_Data; } } @@ -271,8 +268,7 @@ machine(Directory, "Directory protocol") out_msg.PhysicalAddress := address; out_msg.LineAddress := address; out_msg.Type := DMAResponseType:ACK; - out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA, - dma_select_low_bit, dma_select_num_bits)); + out_msg.Destination.add(TBEs[address].DmaRequestor); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } @@ -343,6 +339,14 @@ machine(Directory, "Directory protocol") TBEs[address].DataBlk := in_msg.DataBlk; TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + } + } + + action(r_allocateTbeForDmaRead, "\r", desc="Allocate TBE for DMA Read") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaRequestor := in_msg.Requestor; } } @@ -485,6 +489,7 @@ machine(Directory, "Directory protocol") transition(I, DMA_READ, ID) { //dr_sendDMAData; + r_allocateTbeForDmaRead; qf_queueMemoryFetchRequestDMA; p_popIncomingDMARequestQueue; } @@ -492,6 +497,7 @@ machine(Directory, "Directory protocol") transition(ID, Memory_Data, I) { dr_sendDMAData; //p_popIncomingDMARequestQueue; + w_deallocateTBE; l_popMemQueue; } diff --git a/src/mem/protocol/MI_example-dma.sm b/src/mem/protocol/MI_example-dma.sm index e883288df..79c42e719 100644 --- a/src/mem/protocol/MI_example-dma.sm +++ b/src/mem/protocol/MI_example-dma.sm @@ -71,6 +71,7 @@ machine(DMA, "DMA Controller") out_msg.PhysicalAddress := in_msg.PhysicalAddress; out_msg.LineAddress := in_msg.LineAddress; out_msg.Type := DMARequestType:READ; + out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; out_msg.Destination.add(map_Address_to_Directory(address)); @@ -85,6 +86,7 @@ machine(DMA, "DMA Controller") out_msg.PhysicalAddress := in_msg.PhysicalAddress; out_msg.LineAddress := in_msg.LineAddress; out_msg.Type := DMARequestType:WRITE; + out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; out_msg.Destination.add(map_Address_to_Directory(address)); @@ -113,10 +115,6 @@ machine(DMA, "DMA Controller") dmaResponseQueue_in.dequeue(); } - action(z_stall, "z", desc="dma is busy..stall") { - // do nothing - } - transition(READY, ReadRequest, BUSY_RD) { s_sendReadRequest; p_popRequestQueue; diff --git a/src/mem/protocol/MI_example-msg.sm b/src/mem/protocol/MI_example-msg.sm index d4d557200..3cdb74e49 100644 --- a/src/mem/protocol/MI_example-msg.sm +++ b/src/mem/protocol/MI_example-msg.sm @@ -105,6 +105,7 @@ structure(DMARequestMsg, desc="...", interface="NetworkMessage") { DMARequestType Type, desc="Request type (read/write)"; Address PhysicalAddress, desc="Physical address for this request"; Address LineAddress, desc="Line address for this request"; + MachineID Requestor, desc="Node who initiated the request"; NetDest Destination, desc="Destination"; DataBlock DataBlk, desc="DataBlk attached to this request"; int Len, desc="The length of the request"; diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm index ab58c5c00..3fb4a8862 100644 --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -32,21 +32,32 @@ * */ -machine(L1Cache, "Token protocol") { +machine(L1Cache, "Token protocol") + : int l1_request_latency, + int l1_response_latency, + int l2_select_low_bit, + int l2_select_num_bits, + int N_tokens, + int retry_threshold, + int fixed_timeout_latency, + bool dynamic_timeout_enabled +{ // From this node's L1 cache TO the network - // a local L1 -> this L2 bank, currently ordered with directory forwarded requests - MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false"; + // a local L1 -> this L2 bank - MessageBuffer responseFromL1Cache, network="To", virtual_network="2", ordered="false"; - MessageBuffer persistentFromL1Cache, network="To", virtual_network="3", ordered="true"; + MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false"; + MessageBuffer persistentFromL1Cache, network="To", virtual_network="2", ordered="true"; + // a local L1 -> this L2 bank, currently ordered with directory forwarded requests + MessageBuffer requestFromL1Cache, network="To", virtual_network="4", ordered="false"; + // To this node's L1 cache FROM the network // a L2 bank -> this L1 - MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false"; + MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToL1Cache, network="From", virtual_network="2", ordered="true"; // a L2 bank -> this L1 - MessageBuffer responseToL1Cache, network="From", virtual_network="2", ordered="false"; - MessageBuffer persistentToL1Cache, network="From", virtual_network="3", ordered="true"; + MessageBuffer requestToL1Cache, network="From", virtual_network="4", ordered="false"; // STATES enumeration(State, desc="Cache states", default="L1Cache_State_I") { @@ -111,10 +122,6 @@ machine(L1Cache, "Token protocol") { // TYPES - int getRetryThreshold(); - int getFixedTimeoutLatency(); - bool getDynamicTimeoutEnabled(); - // CacheEntry structure(Entry, desc="...", interface="AbstractCacheEntry") { State CacheState, desc="cache state"; @@ -143,7 +150,7 @@ machine(L1Cache, "Token protocol") { external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); @@ -157,17 +164,28 @@ machine(L1Cache, "Token protocol") { bool isPresent(Address); } + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + bool okToIssueStarving(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } TBETable L1_TBEs, template_hack="<L1Cache_TBE>"; - CacheMemory L1IcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1I"', abstract_chip_ptr="true"; - CacheMemory L1DcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1D"', abstract_chip_ptr="true"; + CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])'; + CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])'; MessageBuffer mandatoryQueue, ordered="false", abstract_chip_ptr="true"; - Sequencer sequencer, abstract_chip_ptr="true", constructor_hack="i"; + Sequencer sequencer, factory='RubySystem::getSequencer(m_cfg["sequencer"])'; bool starving, default="false"; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; TimerTable useTimerTable; TimerTable reissueTimerTable; @@ -175,11 +193,11 @@ machine(L1Cache, "Token protocol") { int outstandingPersistentRequests, default="0"; int averageLatencyHysteresis, default="(8)"; // Constant that provides hysteresis for calculated the estimated average - int averageLatencyCounter, default="(500 << (*m_L1Cache_averageLatencyHysteresis_vec[i]))"; + int averageLatencyCounter, default="(500 << (*m_L1Cache_averageLatencyHysteresis_ptr))"; int averageLatencyEstimate() { DEBUG_EXPR( (averageLatencyCounter >> averageLatencyHysteresis) ); - profile_average_latency_estimate( (averageLatencyCounter >> averageLatencyHysteresis) ); + //profile_average_latency_estimate( (averageLatencyCounter >> averageLatencyHysteresis) ); return averageLatencyCounter >> averageLatencyHysteresis; } @@ -366,30 +384,33 @@ machine(L1Cache, "Token protocol") { } } - GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) { - if (machineIDToMachineType(sender) == MachineType:L1Cache) { - return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this - } else if (machineIDToMachineType(sender) == MachineType:L2Cache) { - if ( sender == (map_L1CacheMachId_to_L2Cache(addr,machineID))) { - return GenericMachineType:L2Cache; - } else { - return GenericMachineType:L2Cache_wCC; - } - } else { - return ConvertMachToGenericMach(machineIDToMachineType(sender)); - } - } - - bool okToIssueStarving(Address addr) { - return persistentTable.okToIssueStarving(addr); +// GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) { +// if (machineIDToMachineType(sender) == MachineType:L1Cache) { +// return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this +// } else if (machineIDToMachineType(sender) == MachineType:L2Cache) { +// +// if (sender == (mapAddressToRange(addr, +// MachineType:L2Cache, +// l2_select_low_bit, +// l2_select_num_bits))) { +// +// return GenericMachineType:L2Cache; +// } else { +// return GenericMachineType:L2Cache_wCC; +// } +// } else { +// return ConvertMachToGenericMach(machineIDToMachineType(sender)); +// } +// } + + bool okToIssueStarving(Address addr, MachineID machinID) { + return persistentTable.okToIssueStarving(addr, machineID); } void markPersistentEntries(Address addr) { persistentTable.markEntries(addr); } - MessageBuffer triggerQueue, ordered="false", random="false"; - // ** OUT_PORTS ** out_port(persistentNetwork_out, PersistentMsg, persistentFromL1Cache); out_port(requestNetwork_out, RequestMsg, requestFromL1Cache); @@ -507,7 +528,11 @@ machine(L1Cache, "Token protocol") { // Mark TBE flag if response received off-chip. Use this to update average latency estimate if ( in_msg.SenderMachine == MachineType:L2Cache ) { - if (in_msg.Sender == map_L1CacheMachId_to_L2Cache(in_msg.Address, machineID)) { + if (in_msg.Sender == mapAddressToRange(in_msg.Address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)) { + // came from an off-chip L2 cache if (L1_TBEs.isPresent(in_msg.Address)) { // L1_TBEs[in_msg.Address].ExternalResponse := true; @@ -523,15 +548,15 @@ machine(L1Cache, "Token protocol") { // profile_memory_response( in_msg.Address); } } else if ( in_msg.SenderMachine == MachineType:L1Cache) { - if (isLocalProcessor(machineID, in_msg.Sender) == false) { - if (L1_TBEs.isPresent(in_msg.Address)) { + //if (isLocalProcessor(machineID, in_msg.Sender) == false) { + //if (L1_TBEs.isPresent(in_msg.Address)) { // L1_TBEs[in_msg.Address].ExternalResponse := true; // profile_offchipL1_response(in_msg.Address ); - } - } - else { + //} + //} + //else { // profile_onchipL1_response(in_msg.Address ); - } + //} } else { error("unexpected SenderMachine"); } @@ -570,42 +595,42 @@ machine(L1Cache, "Token protocol") { // ** INSTRUCTION ACCESS *** // Check to see if it is in the OTHER L1 - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.Address); + trigger(Event:L1_Replacement, in_msg.LineAddress); } - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1IcacheMemory.cacheAvail(in_msg.Address)) { + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { // No room in the L1, so we need to make room - trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.LineAddress)); } } } else { // *** DATA ACCESS *** // Check to see if it is in the OTHER L1 - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.Address); + trigger(Event:L1_Replacement, in_msg.LineAddress); } - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1DcacheMemory.cacheAvail(in_msg.Address)) { + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { // No room in the L1, so we need to make room - trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.LineAddress)); } } } @@ -618,19 +643,31 @@ machine(L1Cache, "Token protocol") { action(a_issueReadRequest, "a", desc="Issue GETS") { if (L1_TBEs[address].IssueCount == 0) { // Update outstanding requests - profile_outstanding_request(outstandingRequests); + //profile_outstanding_request(outstandingRequests); outstandingRequests := outstandingRequests + 1; } - if (L1_TBEs[address].IssueCount >= getRetryThreshold() ) { + if (L1_TBEs[address].IssueCount >= retry_threshold) { // Issue a persistent request if possible - if (okToIssueStarving(address) && (starving == false)) { - enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := PersistentRequestType:GETS_PERSISTENT; out_msg.Requestor := machineID; out_msg.Destination.broadcast(MachineType:L1Cache); - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := L1_TBEs[address].Prefetch; @@ -640,11 +677,11 @@ machine(L1Cache, "Token protocol") { starving := true; if (L1_TBEs[address].IssueCount == 0) { - profile_persistent_prediction(address, L1_TBEs[address].AccessType); + //profile_persistent_prediction(address, L1_TBEs[address].AccessType); } // Update outstanding requests - profile_outstanding_persistent_request(outstandingPersistentRequests); + //profile_outstanding_persistent_request(outstandingPersistentRequests); outstandingPersistentRequests := outstandingPersistentRequests + 1; // Increment IssueCount @@ -666,11 +703,16 @@ machine(L1Cache, "Token protocol") { } } else { // Make a normal request - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.RetryNum := L1_TBEs[address].IssueCount; if (L1_TBEs[address].IssueCount == 0) { out_msg.MessageSize := MessageSizeType:Request_Control; @@ -682,11 +724,18 @@ machine(L1Cache, "Token protocol") { } // send to other local L1s, with local bit set - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; - out_msg.Destination := getOtherLocalL1IDs(machineID); + + // + // Since only one chip, assuming all L1 caches are local + // + //out_msg.Destination := getOtherLocalL1IDs(machineID); + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.remove(machineID); + out_msg.RetryNum := L1_TBEs[address].IssueCount; out_msg.isLocal := true; if (L1_TBEs[address].IssueCount == 0) { @@ -703,10 +752,10 @@ machine(L1Cache, "Token protocol") { // Set a wakeup timer - if (getDynamicTimeoutEnabled()) { + if (dynamic_timeout_enabled) { reissueTimerTable.set(address, 1.25 * averageLatencyEstimate()); } else { - reissueTimerTable.set(address, getFixedTimeoutLatency()); + reissueTimerTable.set(address, fixed_timeout_latency); } } @@ -716,20 +765,32 @@ machine(L1Cache, "Token protocol") { if (L1_TBEs[address].IssueCount == 0) { // Update outstanding requests - profile_outstanding_request(outstandingRequests); + //profile_outstanding_request(outstandingRequests); outstandingRequests := outstandingRequests + 1; } - if (L1_TBEs[address].IssueCount >= getRetryThreshold() ) { + if (L1_TBEs[address].IssueCount >= retry_threshold) { // Issue a persistent request if possible - if ( okToIssueStarving(address) && (starving == false)) { - enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") { + if ( okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := PersistentRequestType:GETX_PERSISTENT; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L1Cache; out_msg.Destination.broadcast(MachineType:L1Cache); - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := L1_TBEs[address].Prefetch; @@ -739,11 +800,11 @@ machine(L1Cache, "Token protocol") { starving := true; // Update outstanding requests - profile_outstanding_persistent_request(outstandingPersistentRequests); + //profile_outstanding_persistent_request(outstandingPersistentRequests); outstandingPersistentRequests := outstandingPersistentRequests + 1; if (L1_TBEs[address].IssueCount == 0) { - profile_persistent_prediction(address, L1_TBEs[address].AccessType); + //profile_persistent_prediction(address, L1_TBEs[address].AccessType); } // Increment IssueCount @@ -766,12 +827,17 @@ machine(L1Cache, "Token protocol") { } else { // Make a normal request - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L1Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.RetryNum := L1_TBEs[address].IssueCount; if (L1_TBEs[address].IssueCount == 0) { @@ -784,12 +850,19 @@ machine(L1Cache, "Token protocol") { } // send to other local L1s too - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; out_msg.isLocal := true; - out_msg.Destination := getOtherLocalL1IDs(machineID); + + // + // Since only one chip, assuming all L1 caches are local + // + //out_msg.Destination := getOtherLocalL1IDs(machineID); + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.remove(machineID); + out_msg.RetryNum := L1_TBEs[address].IssueCount; if (L1_TBEs[address].IssueCount == 0) { out_msg.MessageSize := MessageSizeType:Request_Control; @@ -807,10 +880,10 @@ machine(L1Cache, "Token protocol") { DEBUG_EXPR(L1_TBEs[address].IssueCount); // Set a wakeup timer - if (getDynamicTimeoutEnabled()) { + if (dynamic_timeout_enabled) { reissueTimerTable.set(address, 1.25 * averageLatencyEstimate()); } else { - reissueTimerTable.set(address, getFixedTimeoutLatency()); + reissueTimerTable.set(address, fixed_timeout_latency); } } } @@ -818,7 +891,7 @@ machine(L1Cache, "Token protocol") { action(bb_bounceResponse, "\b", desc="Bounce tokens and data to memory") { peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -833,11 +906,16 @@ machine(L1Cache, "Token protocol") { } action(c_ownedReplacement, "c", desc="Issue writeback") { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Tokens := getCacheEntry(address).Tokens; out_msg.DataBlk := getCacheEntry(address).DataBlk; out_msg.Dirty := getCacheEntry(address).Dirty; @@ -853,11 +931,16 @@ machine(L1Cache, "Token protocol") { // don't send writeback if replacing block with no tokens if (getCacheEntry(address).Tokens != 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Tokens := getCacheEntry(address).Tokens; out_msg.DataBlk := getCacheEntry(address).DataBlk; // assert(getCacheEntry(address).Dirty == false); @@ -879,7 +962,7 @@ machine(L1Cache, "Token protocol") { action(d_sendDataWithToken, "d", desc="Send data and a token from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -902,14 +985,14 @@ machine(L1Cache, "Token protocol") { action(d_sendDataWithNTokenIfAvail, "\dd", desc="Send data and a token from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - if (getCacheEntry(address).Tokens > N_tokens()) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + if (getCacheEntry(address).Tokens > N_tokens) { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(in_msg.Requestor); - out_msg.Tokens := N_tokens(); + out_msg.Tokens := N_tokens; out_msg.DataBlk := getCacheEntry(address).DataBlk; // out_msg.Dirty := getCacheEntry(address).Dirty; out_msg.Dirty := false; @@ -919,10 +1002,10 @@ machine(L1Cache, "Token protocol") { out_msg.MessageSize := MessageSizeType:Response_Data; } } - getCacheEntry(address).Tokens := getCacheEntry(address).Tokens - N_tokens(); + getCacheEntry(address).Tokens := getCacheEntry(address).Tokens - N_tokens; } else if (getCacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -946,7 +1029,7 @@ machine(L1Cache, "Token protocol") { action(dd_sendDataWithAllTokens, "\d", desc="Send data and all tokens from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -969,7 +1052,7 @@ machine(L1Cache, "Token protocol") { action(e_sendAckWithCollectedTokens, "e", desc="Send ack with the tokens we've collected thus far.") { // assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself if (getCacheEntry(address).Tokens > 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -986,7 +1069,7 @@ machine(L1Cache, "Token protocol") { action(ee_sendDataWithAllTokens, "\e", desc="Send data and all tokens from cache to starver") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getCacheEntry(address).Tokens > 0); - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -1005,23 +1088,23 @@ machine(L1Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getCacheEntry(address).Tokens > 0); if (getCacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(persistentTable.findSmallest(address)); assert(getCacheEntry(address).Tokens >= 1); - if (getCacheEntry(address).Tokens > N_tokens()) { - out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens; } else { out_msg.Tokens := getCacheEntry(address).Tokens - 1; } out_msg.MessageSize := MessageSizeType:Response_Control; } } - if (getCacheEntry(address).Tokens > N_tokens()) { - getCacheEntry(address).Tokens := N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + getCacheEntry(address).Tokens := N_tokens; } else { getCacheEntry(address).Tokens := 1; } @@ -1031,15 +1114,15 @@ machine(L1Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getCacheEntry(address).Tokens > 0); if (getCacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(persistentTable.findSmallest(address)); assert(getCacheEntry(address).Tokens >= 1); - if (getCacheEntry(address).Tokens > N_tokens()) { - out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens; } else { out_msg.Tokens := getCacheEntry(address).Tokens - 1; } @@ -1047,8 +1130,8 @@ machine(L1Cache, "Token protocol") { out_msg.Dirty := getCacheEntry(address).Dirty; out_msg.MessageSize := MessageSizeType:Response_Data; } - if (getCacheEntry(address).Tokens > N_tokens()) { - getCacheEntry(address).Tokens := N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + getCacheEntry(address).Tokens := N_tokens; } else { getCacheEntry(address).Tokens := 1; } @@ -1061,7 +1144,7 @@ machine(L1Cache, "Token protocol") { peek(responseNetwork_in, ResponseMsg) { // assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -1079,7 +1162,8 @@ machine(L1Cache, "Token protocol") { action(h_load_hit, "h", desc="Notify sequencer the load completed.") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + //sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + sequencer.readCallback(address, getCacheEntry(address).DataBlk); } action(x_external_load_hit, "x", desc="Notify sequencer the load completed.") { @@ -1087,14 +1171,16 @@ machine(L1Cache, "Token protocol") { DEBUG_EXPR(getCacheEntry(address).DataBlk); peek(responseNetwork_in, ResponseMsg) { - sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + //sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + sequencer.readCallback(address, getCacheEntry(address).DataBlk); } } action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + sequencer.writeCallback(address, getCacheEntry(address).DataBlk); getCacheEntry(address).Dirty := true; DEBUG_EXPR(getCacheEntry(address).DataBlk); } @@ -1103,7 +1189,8 @@ machine(L1Cache, "Token protocol") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); peek(responseNetwork_in, ResponseMsg) { - sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + sequencer.writeCallback(address, getCacheEntry(address).DataBlk); } getCacheEntry(address).Dirty := true; DEBUG_EXPR(getCacheEntry(address).DataBlk); @@ -1133,8 +1220,6 @@ machine(L1Cache, "Token protocol") { useTimerTable.unset(address); } - - action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") { mandatoryQueue_in.dequeue(); } @@ -1156,14 +1241,19 @@ machine(L1Cache, "Token protocol") { } action(p_informL2AboutTokenLoss, "p", desc="Inform L2 about loss of all tokens") { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:INV; out_msg.Tokens := 0; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.DestMachine := MachineType:L2Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Response_Control; } } @@ -1189,13 +1279,25 @@ machine(L1Cache, "Token protocol") { if (L1_TBEs[address].WentPersistent) { // assert(starving == true); outstandingRequests := outstandingRequests - 1; - enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") { + enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L1Cache; out_msg.Destination.broadcast(MachineType:L1Cache); - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Persistent_Control; } @@ -1217,14 +1319,14 @@ machine(L1Cache, "Token protocol") { // profile_token_retry(address, L1_TBEs[address].AccessType, 1); //} - profile_token_retry(address, L1_TBEs[address].AccessType, L1_TBEs[address].IssueCount); + //profile_token_retry(address, L1_TBEs[address].AccessType, L1_TBEs[address].IssueCount); L1_TBEs.deallocate(address); } action(t_sendAckWithCollectedTokens, "t", desc="Send ack with the tokens we've collected thus far.") { if (getCacheEntry(address).Tokens > 0) { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -1259,13 +1361,13 @@ machine(L1Cache, "Token protocol") { action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") { if (L1DcacheMemory.isTagPresent(address) == false) { - L1DcacheMemory.allocate(address); + L1DcacheMemory.allocate(address, new Entry); } } action(pp_allocateL1ICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") { if (L1IcacheMemory.isTagPresent(address) == false) { - L1IcacheMemory.allocate(address); + L1IcacheMemory.allocate(address, new Entry); } } @@ -1281,11 +1383,6 @@ machine(L1Cache, "Token protocol") { } } - - action(z_stall, "z", desc="Stall") { - - } - action(zz_recycleMandatoryQueue, "\z", desc="Send the head of the mandatory queue to the back of the queue.") { mandatoryQueue_in.recycle(); } diff --git a/src/mem/protocol/MOESI_CMP_token-L2cache.sm b/src/mem/protocol/MOESI_CMP_token-L2cache.sm index 0a58ed5cf..9a5c400f2 100644 --- a/src/mem/protocol/MOESI_CMP_token-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L2cache.sm @@ -32,20 +32,33 @@ * */ -machine(L2Cache, "Token protocol") { +machine(L2Cache, "Token protocol") + : int l2_request_latency, + int l2_response_latency, + int N_tokens, + bool filtering_enabled +{ // L2 BANK QUEUES // From local bank of L2 cache TO the network - MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="0", ordered="false"; // this L2 bank -> a local L1 - MessageBuffer GlobalRequestFromL2Cache, network="To", virtual_network="1", ordered="false"; // this L2 bank -> mod-directory - MessageBuffer responseFromL2Cache, network="To", virtual_network="2", ordered="false"; // this L2 bank -> a local L1 || mod-directory + + // this L2 bank -> a local L1 || mod-directory + MessageBuffer responseFromL2Cache, network="To", virtual_network="1", ordered="false"; + // this L2 bank -> mod-directory + MessageBuffer GlobalRequestFromL2Cache, network="To", virtual_network="3", ordered="false"; + // this L2 bank -> a local L1 + MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="4", ordered="false"; // FROM the network to this local bank of L2 cache - MessageBuffer L1RequestToL2Cache, network="From", virtual_network="0", ordered="false"; // a local L1 -> this L2 bank - MessageBuffer GlobalRequestToL2Cache, network="From", virtual_network="1", ordered="false"; // mod-directory -> this L2 bank - MessageBuffer responseToL2Cache, network="From", virtual_network="2", ordered="false"; // a local L1 || mod-directory -> this L2 bank - MessageBuffer persistentToL2Cache, network="From", virtual_network="3", ordered="true"; + + // a local L1 || mod-directory -> this L2 bank + MessageBuffer responseToL2Cache, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToL2Cache, network="From", virtual_network="2", ordered="true"; + // mod-directory -> this L2 bank + MessageBuffer GlobalRequestToL2Cache, network="From", virtual_network="3", ordered="false"; + // a local L1 -> this L2 bank + MessageBuffer L1RequestToL2Cache, network="From", virtual_network="4", ordered="false"; // STATES enumeration(State, desc="L2 Cache states", default="L2Cache_State_I") { @@ -107,8 +120,6 @@ machine(L2Cache, "Token protocol") { DataBlock DataBlk, desc="data for the block"; } - - structure(DirEntry, desc="...") { Set Sharers, desc="Set of the internal processors that want the block in shared state"; bool exclusive, default="false", desc="if local exclusive is likely"; @@ -117,7 +128,7 @@ machine(L2Cache, "Token protocol") { external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); @@ -132,19 +143,28 @@ machine(L2Cache, "Token protocol") { bool isTagPresent(Address); } + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } - CacheMemory L2cacheMemory, template_hack="<L2Cache_Entry>", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,MachineType_L2Cache,int_to_string(i)+"_L2"'; + CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])'; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; PerfectCacheMemory localDirectory, template_hack="<L2Cache_DirEntry>"; - - bool getFilteringEnabled(); - Entry getL2CacheEntry(Address addr), return_by_ref="yes" { if (L2cacheMemory.isTagPresent(addr)) { return L2cacheMemory[addr]; } + assert(false); + return L2cacheMemory[addr]; } int getTokens(Address addr) { @@ -465,15 +485,21 @@ machine(L2Cache, "Token protocol") { // if this is a retry or no local sharers, broadcast normally // if (in_msg.RetryNum > 0 || (in_msg.Type == CoherenceRequestType:GETX && exclusiveExists(in_msg.Address) == false) || (in_msg.Type == CoherenceRequestType:GETS && sharersExist(in_msg.Address) == false)) { - enqueue(globalRequestNetwork_out, RequestMsg, latency="L2_REQUEST_LATENCY") { + enqueue(globalRequestNetwork_out, RequestMsg, latency=l2_request_latency) { out_msg.Address := in_msg.Address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; out_msg.RequestorMachine := in_msg.RequestorMachine; - //out_msg.Destination.broadcast(MachineType:L2Cache); out_msg.RetryNum := in_msg.RetryNum; - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); - out_msg.Destination.remove(map_L1CacheMachId_to_L2Cache(address, in_msg.Requestor)); + + // + // If a statically shared L2 cache, then no other L2 caches can + // store the block + // + //out_msg.Destination.broadcast(MachineType:L2Cache); + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + //out_msg.Destination.remove(map_L1CacheMachId_to_L2Cache(address, in_msg.Requestor)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.AccessMode := in_msg.AccessMode; @@ -489,7 +515,7 @@ machine(L2Cache, "Token protocol") { action(bb_bounceResponse, "\b", desc="Bounce tokens and data to memory") { peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -505,7 +531,7 @@ machine(L2Cache, "Token protocol") { action(c_cleanReplacement, "c", desc="Issue clean writeback") { if (getL2CacheEntry(address).Tokens > 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -519,7 +545,7 @@ machine(L2Cache, "Token protocol") { } action(cc_dirtyReplacement, "\c", desc="Issue dirty writeback") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; @@ -541,22 +567,22 @@ machine(L2Cache, "Token protocol") { action(d_sendDataWithTokens, "d", desc="Send data and a token from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - if (getL2CacheEntry(address).Tokens > N_tokens()) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + if (getL2CacheEntry(address).Tokens > N_tokens) { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; out_msg.Destination.add(in_msg.Requestor); - out_msg.Tokens := N_tokens(); + out_msg.Tokens := N_tokens; out_msg.DataBlk := getL2CacheEntry(address).DataBlk; out_msg.Dirty := false; out_msg.MessageSize := MessageSizeType:Response_Data; } - getL2CacheEntry(address).Tokens := getL2CacheEntry(address).Tokens - N_tokens(); + getL2CacheEntry(address).Tokens := getL2CacheEntry(address).Tokens - N_tokens; } else { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -574,7 +600,7 @@ machine(L2Cache, "Token protocol") { action(dd_sendDataWithAllTokens, "\d", desc="Send data and all tokens from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -592,7 +618,7 @@ machine(L2Cache, "Token protocol") { action(e_sendAckWithCollectedTokens, "e", desc="Send ack with the tokens we've collected thus far.") { if (getL2CacheEntry(address).Tokens > 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -607,7 +633,7 @@ machine(L2Cache, "Token protocol") { } action(ee_sendDataWithAllTokens, "\e", desc="Send data and all tokens from cache to starver") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -626,7 +652,7 @@ machine(L2Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getL2CacheEntry(address).Tokens > 0); if (getL2CacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -644,7 +670,7 @@ machine(L2Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getL2CacheEntry(address).Tokens > 0); if (getL2CacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -666,7 +692,7 @@ machine(L2Cache, "Token protocol") { // assert(persistentTable.isLocked(address)); peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -684,7 +710,7 @@ machine(L2Cache, "Token protocol") { //assert(persistentTable.isLocked(address)); peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; if (in_msg.Type == CoherenceResponseType:WB_SHARED_DATA) { out_msg.Type := CoherenceResponseType:DATA_SHARED; @@ -706,7 +732,7 @@ machine(L2Cache, "Token protocol") { // assert(persistentTable.isLocked(address)); peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -729,24 +755,31 @@ machine(L2Cache, "Token protocol") { action(j_forwardTransientRequestToLocalSharers, "j", desc="Forward external transient request to local sharers") { peek(requestNetwork_in, RequestMsg) { - if (getFilteringEnabled() == true && in_msg.RetryNum == 0 && sharersExist(in_msg.Address) == false) { - profile_filter_action(1); + if (filtering_enabled == true && in_msg.RetryNum == 0 && sharersExist(in_msg.Address) == false) { + //profile_filter_action(1); DEBUG_EXPR("filtered message"); DEBUG_EXPR(in_msg.RetryNum); } else { - enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) { + enqueue(localRequestNetwork_out, RequestMsg, latency=l2_response_latency ) { out_msg.Address := in_msg.Address; out_msg.Requestor := in_msg.Requestor; out_msg.RequestorMachine := in_msg.RequestorMachine; - out_msg.Destination := getLocalL1IDs(machineID); + + // + // Currently assuming only one chip so all L1s are local + // + //out_msg.Destination := getLocalL1IDs(machineID); + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.remove(in_msg.Requestor); + out_msg.Type := in_msg.Type; out_msg.isLocal := false; out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.AccessMode := in_msg.AccessMode; out_msg.Prefetch := in_msg.Prefetch; } - profile_filter_action(0); + //profile_filter_action(0); } } } @@ -756,7 +789,7 @@ machine(L2Cache, "Token protocol") { peek(L1requestNetwork_in, RequestMsg) { assert(getL2CacheEntry(address).Tokens > 0); //enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_to_L1_RESPONSE_LATENCY") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -774,7 +807,7 @@ machine(L2Cache, "Token protocol") { action(k_dataOwnerFromL2CacheToL1Requestor, "\k", desc="Send data and a token from cache to L1 requestor") { peek(L1requestNetwork_in, RequestMsg) { assert(getL2CacheEntry(address).Tokens > 0); - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -793,7 +826,7 @@ machine(L2Cache, "Token protocol") { peek(L1requestNetwork_in, RequestMsg) { // assert(getL2CacheEntry(address).Tokens == max_tokens()); //enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_to_L1_RESPONSE_LATENCY") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -840,12 +873,13 @@ machine(L2Cache, "Token protocol") { } action(r_markNewSharer, "r", desc="Mark the new local sharer from local request message") { - peek(L1requestNetwork_in, RequestMsg) { - if (in_msg.Type == CoherenceRequestType:GETX) { - setNewWriter(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); - } else if (in_msg.Type == CoherenceRequestType:GETS) { - addNewSharer(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); + if (machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache) { + if (in_msg.Type == CoherenceRequestType:GETX) { + setNewWriter(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); + } else if (in_msg.Type == CoherenceRequestType:GETS) { + addNewSharer(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); + } } } } @@ -854,16 +888,19 @@ machine(L2Cache, "Token protocol") { clearExclusiveBitIfExists(address); } - action( r_setMRU, "\rr", desc="manually set the MRU bit for cache line" ) { - if(isCacheTagPresent(address)) { - L2cacheMemory.setMRU(address); + action(r_setMRU, "\rr", desc="manually set the MRU bit for cache line" ) { + peek(L1requestNetwork_in, RequestMsg) { + if ((machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache) && + (isCacheTagPresent(address))) { + L2cacheMemory.setMRU(address); + } } } action(t_sendAckWithCollectedTokens, "t", desc="Send ack with the tokens we've collected thus far.") { if (getL2CacheEntry(address).Tokens > 0) { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -881,7 +918,7 @@ machine(L2Cache, "Token protocol") { action(tt_sendLocalAckWithCollectedTokens, "tt", desc="Send ack with the tokens we've collected thus far.") { if (getL2CacheEntry(address).Tokens > 0) { peek(L1requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -906,19 +943,19 @@ machine(L2Cache, "Token protocol") { } action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") { - L2cacheMemory.allocate(address); + L2cacheMemory.allocate(address, new Entry); } action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { L2cacheMemory.deallocate(address); } - action(uu_profileMiss, "\u", desc="Profile the demand miss") { - peek(L1requestNetwork_in, RequestMsg) { + //action(uu_profileMiss, "\u", desc="Profile the demand miss") { + // peek(L1requestNetwork_in, RequestMsg) { // AccessModeType not implemented //profile_L2Cache_miss(convertToGenericType(in_msg.Type), in_msg.AccessMode, MessageSizeTypeToInt(in_msg.MessageSize), in_msg.Prefetch, machineIDToNodeID(in_msg.Requestor)); - } - } + // } + //} action(w_assertIncomingDataAndCacheDataMatch, "w", desc="Assert that the incoming data and the data in the cache match") { @@ -927,11 +964,6 @@ machine(L2Cache, "Token protocol") { } } - action(z_stall, "z", desc="Stall") { - } - - - //***************************************************** // TRANSITIONS @@ -961,7 +993,7 @@ machine(L2Cache, "Token protocol") { transition(NP, {L1_GETS, L1_GETX}) { a_broadcastLocalRequest; r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1012,7 +1044,7 @@ machine(L2Cache, "Token protocol") { a_broadcastLocalRequest; tt_sendLocalAckWithCollectedTokens; // send any tokens we have collected r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1020,7 +1052,7 @@ machine(L2Cache, "Token protocol") { a_broadcastLocalRequest; tt_sendLocalAckWithCollectedTokens; // send any tokens we have collected r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1181,7 +1213,7 @@ machine(L2Cache, "Token protocol") { tt_sendLocalAckWithCollectedTokens; r_markNewSharer; r_setMRU; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1294,7 +1326,7 @@ machine(L2Cache, "Token protocol") { k_dataAndAllTokensFromL2CacheToL1Requestor; r_markNewSharer; r_setMRU; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1382,7 +1414,7 @@ machine(L2Cache, "Token protocol") { transition(I_L, {L1_GETX, L1_GETS}) { a_broadcastLocalRequest; r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1391,7 +1423,7 @@ machine(L2Cache, "Token protocol") { tt_sendLocalAckWithCollectedTokens; r_markNewSharer; r_setMRU; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } diff --git a/src/mem/protocol/MOESI_CMP_token-dir.sm b/src/mem/protocol/MOESI_CMP_token-dir.sm index 1592fd123..7925a8fe0 100644 --- a/src/mem/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/protocol/MOESI_CMP_token-dir.sm @@ -32,14 +32,23 @@ */ -machine(Directory, "Token protocol") { - - MessageBuffer requestFromDir, network="To", virtual_network="1", ordered="false"; - MessageBuffer responseFromDir, network="To", virtual_network="2", ordered="false"; - - MessageBuffer persistentToDir, network="From", virtual_network="3", ordered="true"; - MessageBuffer requestToDir, network="From", virtual_network="1", ordered="false"; - MessageBuffer responseToDir, network="From", virtual_network="2", ordered="false"; +machine(Directory, "Token protocol") + : int directory_latency, + int l2_select_low_bit, + int l2_select_num_bits, + bool distributed_persistent, + int fixed_timeout_latency +{ + + MessageBuffer dmaResponseFromDir, network="To", virtual_network="0", ordered="true"; + MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; + MessageBuffer persistentFromDir, network="To", virtual_network="2", ordered="true"; + MessageBuffer requestFromDir, network="To", virtual_network="4", ordered="false"; + + MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToDir, network="From", virtual_network="2", ordered="true"; + MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; + MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; // STATES enumeration(State, desc="Directory states", default="Directory_State_O") { @@ -47,6 +56,24 @@ machine(Directory, "Token protocol") { O, desc="Owner"; NO, desc="Not Owner"; L, desc="Locked"; + + // Memory wait states - can block all messages including persistent requests + O_W, desc="transitioning to Owner, waiting for memory write"; + L_W, desc="transitioning to Locked, waiting for memory read"; + DR_L_W, desc="transitioning to Locked underneath a DMA read, waiting for memory data"; + NO_W, desc="transitioning to Not Owner, waiting for memory read"; + O_DW_W, desc="transitioning to Owner, waiting for memory before DMA ack"; + O_DR_W, desc="transitioning to Owner, waiting for memory before DMA data"; + + // DMA request transient states - must respond to persistent requests + O_DW, desc="issued GETX for DMA write, waiting for all tokens"; + NO_DW, desc="issued GETX for DMA write, waiting for all tokens"; + NO_DR, desc="issued GETS for DMA read, waiting for data"; + + // DMA request in progress - competing with a CPU persistent request + DW_L, desc="issued GETX for DMA write, CPU persistent request must complete first"; + DR_L, desc="issued GETS for DMA read, CPU persistent request must complete first"; + } // Events @@ -55,9 +82,23 @@ machine(Directory, "Token protocol") { GETS, desc="A GETS arrives"; Lockdown, desc="A lockdown request arrives"; Unlockdown, desc="An un-lockdown request arrives"; + Own_Lock_or_Unlock, desc="own lock or unlock"; Data_Owner, desc="Data arrive"; + Data_All_Tokens, desc="Data and all tokens"; Ack_Owner, desc="Owner token arrived without data because it was clean"; + Ack_Owner_All_Tokens, desc="All tokens including owner arrived without data because it was clean"; Tokens, desc="Tokens arrive"; + Ack_All_Tokens, desc="All_Tokens arrive"; + Request_Timeout, desc="A DMA request has timed out"; + + // Memory Controller + Memory_Data, desc="Fetched data from memory arrives"; + Memory_Ack, desc="Writeback Ack from memory arrives"; + + // DMA requests + DMA_READ, desc="A DMA Read memory request"; + DMA_WRITE, desc="A DMA Write memory request"; + DMA_WRITE_All_Tokens, desc="A DMA Write memory request, directory has all tokens"; } // TYPES @@ -73,7 +114,7 @@ machine(Directory, "Token protocol") { // is 'soft state' that does not need to be correct (as long as // you're eventually willing to resort to broadcast.) - Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time."; + Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time."; Set Sharers, desc="Probable sharers of the line. More accurately, the set of processors who need to see a GetX"; } @@ -82,23 +123,70 @@ machine(Directory, "Token protocol") { bool isPresent(Address); } + external_type(MemoryControl, inport="yes", outport="yes") { + + } + + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + bool okToIssueStarving(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } + + // TBE entries for DMA requests + structure(TBE, desc="TBE entries for outstanding DMA requests") { + Address PhysicalAddress, desc="physical address"; + State TBEState, desc="Transient State"; + DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory"; + DataBlock DataBlk, desc="The current view of system memory"; + int Len, desc="..."; + MachineID DmaRequestor, desc="DMA requestor"; + bool WentPersistent, desc="Did the DMA request require a persistent request"; + } + + external_type(TBETable) { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } // ** OBJECTS ** - DirectoryMemory directory, constructor_hack="i"; + DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; + + MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; + TimerTable reissueTimerTable; + + TBETable TBEs, template_hack="<Directory_TBE>"; + + bool starving, default="false"; State getState(Address addr) { - return directory[addr].DirectoryState; + if (TBEs.isPresent(addr)) { + return TBEs[addr].TBEState; + } else { + return directory[addr].DirectoryState; + } } void setState(Address addr, State state) { + if (TBEs.isPresent(addr)) { + TBEs[addr].TBEState := state; + } directory[addr].DirectoryState := state; if (state == State:L) { assert(directory[addr].Tokens == 0); - } + } // We have one or zero owners assert((directory[addr].Owner.count() == 0) || (directory[addr].Owner.count() == 1)); @@ -112,19 +200,90 @@ machine(Directory, "Token protocol") { // assert(directory[addr].Tokens >= (max_tokens() / 2)); // Only mostly true; this might not always hold } } + + bool okToIssueStarving(Address addr, MachineID machinID) { + return persistentTable.okToIssueStarving(addr, machineID); + } + + void markPersistentEntries(Address addr) { + persistentTable.markEntries(addr); + } // ** OUT_PORTS ** out_port(responseNetwork_out, ResponseMsg, responseFromDir); + out_port(persistentNetwork_out, PersistentMsg, persistentFromDir); out_port(requestNetwork_out, RequestMsg, requestFromDir); + out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir); + + // + // Memory buffer for memory controller to DIMM communication + // + out_port(memQueue_out, MemoryMsg, memBuffer); // ** IN_PORTS ** + + // off-chip memory request/response is done + in_port(memQueue_in, MemoryMsg, memBuffer) { + if (memQueue_in.isReady()) { + peek(memQueue_in, MemoryMsg) { + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:Memory_Data, in_msg.Address); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:Memory_Ack, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + + // Reissue Timer + in_port(reissueTimerTable_in, Address, reissueTimerTable) { + if (reissueTimerTable_in.isReady()) { + trigger(Event:Request_Timeout, reissueTimerTable.readyAddress()); + } + } + + in_port(responseNetwork_in, ResponseMsg, responseToDir) { + if (responseNetwork_in.isReady()) { + peek(responseNetwork_in, ResponseMsg) { + assert(in_msg.Destination.isElement(machineID)); + if (directory[in_msg.Address].Tokens + in_msg.Tokens == max_tokens()) { + if ((in_msg.Type == CoherenceResponseType:DATA_OWNER) || + (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { + trigger(Event:Data_All_Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { + trigger(Event:Ack_Owner_All_Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack_All_Tokens, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } else { + if (in_msg.Type == CoherenceResponseType:DATA_OWNER) { + trigger(Event:Data_Owner, in_msg.Address); + } else if ((in_msg.Type == CoherenceResponseType:ACK) || + (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { + trigger(Event:Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { + trigger(Event:Ack_Owner, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + } in_port(persistentNetwork_in, PersistentMsg, persistentToDir) { if (persistentNetwork_in.isReady()) { peek(persistentNetwork_in, PersistentMsg) { assert(in_msg.Destination.isElement(machineID)); - if (distributedPersistentEnabled()) { + if (distributed_persistent) { // Apply the lockdown or unlockdown message to the table if (in_msg.Type == PersistentRequestType:GETX_PERSISTENT) { persistentTable.persistentRequestLock(in_msg.Address, in_msg.Requestor, AccessType:Write); @@ -173,19 +332,18 @@ machine(Directory, "Token protocol") { } } - in_port(responseNetwork_in, ResponseMsg, responseToDir) { - if (responseNetwork_in.isReady()) { - peek(responseNetwork_in, ResponseMsg) { - assert(in_msg.Destination.isElement(machineID)); - if (in_msg.Type == CoherenceResponseType:DATA_OWNER) { - trigger(Event:Data_Owner, in_msg.Address); - } else if ((in_msg.Type == CoherenceResponseType:ACK) || - (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { - trigger(Event:Tokens, in_msg.Address); - } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { - trigger(Event:Ack_Owner, in_msg.Address); + in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, DMARequestMsg) { + if (in_msg.Type == DMARequestType:READ) { + trigger(Event:DMA_READ, in_msg.LineAddress); + } else if (in_msg.Type == DMARequestType:WRITE) { + if (directory[in_msg.LineAddress].Tokens == max_tokens()) { + trigger(Event:DMA_WRITE_All_Tokens, in_msg.LineAddress); + } else { + trigger(Event:DMA_WRITE, in_msg.LineAddress); + } } else { - DEBUG_EXPR(in_msg.Type); error("Invalid message"); } } @@ -199,7 +357,7 @@ machine(Directory, "Token protocol") { if (directory[address].Tokens > 0) { peek(requestNetwork_in, RequestMsg) { // enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME? - enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME? + enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME? out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -213,11 +371,151 @@ machine(Directory, "Token protocol") { } } + action(px_tryIssuingPersistentGETXRequest, "px", desc="...") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:GETX_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + markPersistentEntries(address); + starving := true; + + TBEs[address].WentPersistent := true; + + // Do not schedule a wakeup, a persistent requests will always complete + } else { + + // We'd like to issue a persistent request, but are not allowed + // to issue a P.R. right now. This, we do not increment the + // IssueCount. + + // Set a wakeup timer + reissueTimerTable.set(address, 10); + } + } + + action(bw_broadcastWrite, "bw", desc="Broadcast GETX if we need tokens") { + peek(dmaRequestQueue_in, DMARequestMsg) { + // + // Assser that we only send message if we don't already have all the tokens + // + assert(directory[address].Tokens != max_tokens()); + enqueue(requestNetwork_out, RequestMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + + // + // Since only one chip, assuming all L1 caches are local + // + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.RetryNum := 0; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + } + } + + action(ps_tryIssuingPersistentGETSRequest, "ps", desc="...") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:GETS_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + markPersistentEntries(address); + starving := true; + + TBEs[address].WentPersistent := true; + + // Do not schedule a wakeup, a persistent requests will always complete + } else { + + // We'd like to issue a persistent request, but are not allowed + // to issue a P.R. right now. This, we do not increment the + // IssueCount. + + // Set a wakeup timer + reissueTimerTable.set(address, 10); + } + } + + action(br_broadcastRead, "br", desc="Broadcast GETS for data") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(requestNetwork_out, RequestMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + + // + // Since only one chip, assuming all L1 caches are local + // + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.RetryNum := 0; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + } + } + action(aa_sendTokensToStarver, "\a", desc="Send tokens to starver") { // Only send a message if we have tokens to send if (directory[address].Tokens > 0) { // enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME? - enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME? + enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME? out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -230,14 +528,14 @@ machine(Directory, "Token protocol") { } } - action(d_sendDataWithAllTokens, "d", desc="Send data and tokens to requestor") { - peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { + action(d_sendMemoryDataWithAllTokens, "d", desc="Send data and tokens to requestor") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:Directory; - out_msg.Destination.add(in_msg.Requestor); + out_msg.Destination.add(in_msg.OriginalRequestorMachId); assert(directory[address].Tokens > 0); out_msg.Tokens := directory[in_msg.Address].Tokens; out_msg.DataBlk := directory[in_msg.Address].DataBlk; @@ -249,21 +547,140 @@ machine(Directory, "Token protocol") { } action(dd_sendDataWithAllTokensToStarver, "\d", desc="Send data and tokens to starver") { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { - out_msg.Address := address; - out_msg.Type := CoherenceResponseType:DATA_OWNER; - out_msg.Sender := machineID; - out_msg.SenderMachine := MachineType:Directory; - out_msg.Destination.add(persistentTable.findSmallest(address)); - assert(directory[address].Tokens > 0); - out_msg.Tokens := directory[address].Tokens; - out_msg.DataBlk := directory[address].DataBlk; - out_msg.Dirty := false; - out_msg.MessageSize := MessageSizeType:Response_Data; + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_OWNER; + out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:Directory; + out_msg.Destination.add(persistentTable.findSmallest(address)); + assert(directory[address].Tokens > 0); + out_msg.Tokens := directory[address].Tokens; + out_msg.DataBlk := directory[address].DataBlk; + out_msg.Dirty := false; + out_msg.MessageSize := MessageSizeType:Response_Data; + } } directory[address].Tokens := 0; } + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { + peek(requestNetwork_in, RequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(fd_memoryDma, "fd", desc="Queue off-chip fetch request") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(lq_queueMemoryWbRequest, "lq", desc="Write data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + DEBUG_EXPR(out_msg); + } + } + + action(ld_queueMemoryDmaWriteFromTbe, "ld", desc="Write DMA data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + // then add the dma write data + out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + DEBUG_EXPR(out_msg); + } + } + + action(lr_queueMemoryDmaReadWriteback, "lr", desc="Write DMA data from read to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + + action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaDataBlk := in_msg.DataBlk; + TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + TBEs[address].WentPersistent := false; + } + } + + action(s_deallocateTBE, "s", desc="Deallocate TBE") { + + if (TBEs[address].WentPersistent) { + assert(starving == true); + + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + } + starving := false; + } + + TBEs.deallocate(address); + } + + action(rd_recordDataInTbe, "rd", desc="Record data in TBE") { + peek(responseNetwork_in, ResponseMsg) { + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + + action(cd_writeCleanDataToTbe, "cd", desc="Write clean memory data to TBE") { + TBEs[address].DataBlk := directory[address].DataBlk; + } + + action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { + directory[address].DataBlk := TBEs[address].DataBlk; + directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + } + action(f_incrementTokens, "f", desc="Increment the number of tokens we're tracking") { peek(responseNetwork_in, ResponseMsg) { assert(in_msg.Tokens >= 1); @@ -275,14 +692,34 @@ machine(Directory, "Token protocol") { requestNetwork_in.dequeue(); } + action(z_recycleRequest, "z", desc="Recycle the request queue") { + requestNetwork_in.recycle(); + } + action(k_popIncomingResponseQueue, "k", desc="Pop incoming response queue") { responseNetwork_in.dequeue(); } + action(kz_recycleResponse, "kz", desc="Recycle incoming response queue") { + responseNetwork_in.recycle(); + } + action(l_popIncomingPersistentQueue, "l", desc="Pop incoming persistent queue") { persistentNetwork_in.dequeue(); } + action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") { + dmaRequestQueue_in.recycle(); + } + + action(l_popMemQueue, "q", desc="Pop off-chip request queue") { + memQueue_in.dequeue(); + } + action(m_writeDataToMemory, "m", desc="Write dirty writeback to memory") { peek(responseNetwork_in, ResponseMsg) { directory[in_msg.Address].DataBlk := in_msg.DataBlk; @@ -291,18 +728,15 @@ machine(Directory, "Token protocol") { } } - action(n_checkIncomingMsg, "n", desc="Check incoming token message") { + action(n_checkData, "n", desc="Check incoming clean data message") { peek(responseNetwork_in, ResponseMsg) { - assert(in_msg.Type == CoherenceResponseType:ACK_OWNER); - assert(in_msg.Dirty == false); - assert(in_msg.MessageSize == MessageSizeType:Writeback_Control); assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk); } } action(r_bounceResponse, "r", desc="Bounce response to starving processor") { peek(responseNetwork_in, ResponseMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -316,7 +750,20 @@ machine(Directory, "Token protocol") { } } - action(s_bounceDatalessOwnerToken, "s", desc="Bounce clean owner token to starving processor") { + action(st_scheduleTimeout, "st", desc="Schedule Timeout") { + // + // currently only support a fixed timeout latency + // + reissueTimerTable.set(address, fixed_timeout_latency); + } + + action(ut_unsetReissueTimer, "ut", desc="Unset reissue timer.") { + if (reissueTimerTable.isSet(address)) { + reissueTimerTable.unset(address); + } + } + + action(bd_bounceDatalessOwnerToken, "bd", desc="Bounce clean owner token to starving processor") { peek(responseNetwork_in, ResponseMsg) { assert(in_msg.Type == CoherenceResponseType:ACK_OWNER); assert(in_msg.Dirty == false); @@ -331,7 +778,7 @@ machine(Directory, "Token protocol") { // Bounce the message, but "re-associate" the data and the owner // token. In essence we're converting an ACK_OWNER message to a // DATA_OWNER message, keeping the number of tokens the same. - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -346,53 +793,212 @@ machine(Directory, "Token protocol") { } } + action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:ACK; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + + action(dm_sendMemoryDataToDma, "dm", desc="Send Data to DMA controller from memory") { + peek(memQueue_in, MemoryMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dd_sendDmaData, "dd", desc="Send Data to DMA controller") { + peek(responseNetwork_in, ResponseMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } // TRANSITIONS - // Trans. from O - transition(O, GETX, NO) { - d_sendDataWithAllTokens; + // + // Trans. from base state O + // the directory has valid data + // + transition(O, GETX, NO_W) { + qf_queueMemoryFetchRequest; j_popIncomingRequestQueue; } - transition(O, GETS, NO) { - d_sendDataWithAllTokens; + transition(O, DMA_WRITE, O_DW) { + vd_allocateDmaRequestInTBE; + bw_broadcastWrite; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + + transition(O, DMA_WRITE_All_Tokens, O_DW_W) { + vd_allocateDmaRequestInTBE; + cd_writeCleanDataToTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + p_popDmaRequestQueue; + } + + transition(O, GETS, NO_W) { + qf_queueMemoryFetchRequest; j_popIncomingRequestQueue; } - transition(O, Lockdown, L) { - dd_sendDataWithAllTokensToStarver; + transition(O, DMA_READ, O_DR_W) { + vd_allocateDmaRequestInTBE; + fd_memoryDma; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + + transition(O, Lockdown, L_W) { + qf_queueMemoryFetchRequest; + l_popIncomingPersistentQueue; + } + + transition(O, {Tokens, Ack_All_Tokens}) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + transition(O, {Data_Owner, Data_All_Tokens}) { + n_checkData; + f_incrementTokens; + k_popIncomingResponseQueue; + } + + // + // transitioning to Owner, waiting for memory before DMA ack + // All other events should recycle/stall + // + transition(O_DR_W, Memory_Data, O) { + dm_sendMemoryDataToDma; + ut_unsetReissueTimer; + s_deallocateTBE; + l_popMemQueue; + } + + // + // issued GETX for DMA write, waiting for all tokens + // + transition(O_DW, Tokens) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + transition(O_DW, Data_Owner) { + f_incrementTokens; + rd_recordDataInTbe; + k_popIncomingResponseQueue; + } + + transition(O_DW, Ack_Owner) { + f_incrementTokens; + cd_writeCleanDataToTbe; + k_popIncomingResponseQueue; + } + + transition(O_DW, Lockdown, DW_L) { l_popIncomingPersistentQueue; } - transition(O, Tokens) { + transition({NO_DW, O_DW}, Data_All_Tokens, O_DW_W) { f_incrementTokens; + rd_recordDataInTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; k_popIncomingResponseQueue; } + transition(O_DW, Ack_All_Tokens, O_DW_W) { + f_incrementTokens; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; + k_popIncomingResponseQueue; + } + + transition(O_DW, Ack_Owner_All_Tokens, O_DW_W) { + f_incrementTokens; + cd_writeCleanDataToTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; + k_popIncomingResponseQueue; + } + + transition(O_DW_W, Memory_Ack, O) { + da_sendDmaAck; + s_deallocateTBE; + l_popMemQueue; + } + + // // Trans. from NO + // The direcotry does not have valid data, but may have some tokens + // transition(NO, GETX) { a_sendTokens; j_popIncomingRequestQueue; } + transition(NO, DMA_WRITE, NO_DW) { + vd_allocateDmaRequestInTBE; + bw_broadcastWrite; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + transition(NO, GETS) { j_popIncomingRequestQueue; } + transition(NO, DMA_READ, NO_DR) { + vd_allocateDmaRequestInTBE; + br_broadcastRead; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + transition(NO, Lockdown, L) { aa_sendTokensToStarver; l_popIncomingPersistentQueue; } - transition(NO, Data_Owner, O) { + transition(NO, {Data_Owner, Data_All_Tokens}, O_W) { m_writeDataToMemory; f_incrementTokens; + lq_queueMemoryWbRequest; k_popIncomingResponseQueue; } - transition(NO, Ack_Owner, O) { - n_checkIncomingMsg; + transition(NO, {Ack_Owner, Ack_Owner_All_Tokens}, O) { + n_checkData; f_incrementTokens; k_popIncomingResponseQueue; } @@ -402,34 +1008,156 @@ machine(Directory, "Token protocol") { k_popIncomingResponseQueue; } + transition(NO_W, Memory_Data, NO) { + d_sendMemoryDataWithAllTokens; + l_popMemQueue; + } + + // Trans. from NO_DW + transition(NO_DW, Request_Timeout) { + ut_unsetReissueTimer; + px_tryIssuingPersistentGETXRequest; + } + + transition(NO_DW, Lockdown, DW_L) { + aa_sendTokensToStarver; + l_popIncomingPersistentQueue; + } + + // Note: NO_DW, Data_All_Tokens transition is combined with O_DW + // Note: NO_DW should not receive the action Ack_All_Tokens because the + // directory does not have valid data + + transition(NO_DW, Data_Owner, O_DW) { + f_incrementTokens; + rd_recordDataInTbe; + lq_queueMemoryWbRequest; + k_popIncomingResponseQueue; + } + + transition({NO_DW, NO_DR}, Tokens) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + // Trans. from NO_DR + transition(NO_DR, Request_Timeout) { + ut_unsetReissueTimer; + ps_tryIssuingPersistentGETSRequest; + } + + transition(NO_DR, Lockdown, DR_L) { + aa_sendTokensToStarver; + l_popIncomingPersistentQueue; + } + + transition(NO_DR, {Data_Owner, Data_All_Tokens}, O_W) { + m_writeDataToMemory; + f_incrementTokens; + dd_sendDmaData; + lr_queueMemoryDmaReadWriteback; + ut_unsetReissueTimer; + s_deallocateTBE; + k_popIncomingResponseQueue; + } + // Trans. from L - transition(L, {GETX, GETS}) { + transition({L, DW_L, DR_L}, {GETX, GETS}) { j_popIncomingRequestQueue; } - transition(L, Lockdown) { + transition({L, DW_L, DR_L, L_W, DR_L_W}, Lockdown) { l_popIncomingPersistentQueue; } - // we could change this to write the data to memory and send it cleanly - transition(L, Data_Owner) { + // + // Received data for lockdown blocks + // For blocks with outstanding dma requests to them + // ...we could change this to write the data to memory and send it cleanly + // ...we could also proactively complete our DMA requests + // However, to keep my mind from spinning out-of-control, we won't for now :) + // + transition({DW_L, DR_L, L}, {Data_Owner, Data_All_Tokens}) { r_bounceResponse; k_popIncomingResponseQueue; } - transition(L, Tokens) { + transition({DW_L, DR_L, L}, Tokens) { r_bounceResponse; k_popIncomingResponseQueue; } - transition(L, Ack_Owner) { - s_bounceDatalessOwnerToken; + transition({DW_L, DR_L, L}, {Ack_Owner_All_Tokens, Ack_Owner}) { + bd_bounceDatalessOwnerToken; k_popIncomingResponseQueue; } - transition(L, Unlockdown, NO) { l_popIncomingPersistentQueue; } + transition(L_W, Memory_Data, L) { + dd_sendDataWithAllTokensToStarver; + l_popMemQueue; + } + + transition(DR_L_W, Memory_Data, DR_L) { + dd_sendDataWithAllTokensToStarver; + l_popMemQueue; + } + + transition(DW_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DW) { + l_popIncomingPersistentQueue; + } + + transition(DR_L_W, {Unlockdown, Own_Lock_or_Unlock}, O_DR_W) { + l_popIncomingPersistentQueue; + } + + transition({DW_L, DR_L_W}, Request_Timeout) { + ut_unsetReissueTimer; + px_tryIssuingPersistentGETXRequest; + } + + transition(DR_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DR) { + l_popIncomingPersistentQueue; + } + + transition(DR_L, Request_Timeout) { + ut_unsetReissueTimer; + ps_tryIssuingPersistentGETSRequest; + } + + transition(O_W, Memory_Ack, O) { + l_popMemQueue; + } + + transition({O, NO, L, O_DW, NO_DW, NO_DR}, Own_Lock_or_Unlock) { + l_popIncomingPersistentQueue; + } + + // Blocked states + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR}, {GETX, GETS}) { + z_recycleRequest; + } + + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR, L, DW_L, DR_L}, {DMA_READ, DMA_WRITE}) { + y_recycleDmaRequestQueue; + } + + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W}, {Data_Owner, Ack_Owner, Tokens}) { + kz_recycleResponse; + } + + transition({NO_W, O_W}, Lockdown, L_W) { + l_popIncomingPersistentQueue; + } + + transition(O_DR_W, Lockdown, DR_L_W) { + l_popIncomingPersistentQueue; + } + + transition({NO_W, O_W, O_DR_W}, {Unlockdown, Own_Lock_or_Unlock}) { + l_popIncomingPersistentQueue; + } } diff --git a/src/mem/protocol/MOESI_CMP_token-dma.sm b/src/mem/protocol/MOESI_CMP_token-dma.sm new file mode 100644 index 000000000..550a36ae0 --- /dev/null +++ b/src/mem/protocol/MOESI_CMP_token-dma.sm @@ -0,0 +1,165 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +machine(DMA, "DMA Controller") +: int request_latency +{ + + MessageBuffer responseFromDir, network="From", virtual_network="0", ordered="true", no_vector="true"; + MessageBuffer reqToDirectory, network="To", virtual_network="5", ordered="false", no_vector="true"; + + enumeration(State, desc="DMA states", default="DMA_State_READY") { + READY, desc="Ready to accept a new request"; + BUSY_RD, desc="Busy: currently processing a request"; + BUSY_WR, desc="Busy: currently processing a request"; + } + + enumeration(Event, desc="DMA events") { + ReadRequest, desc="A new read request"; + WriteRequest, desc="A new write request"; + Data, desc="Data from a DMA memory read"; + Ack, desc="DMA write to memory completed"; + } + + external_type(DMASequencer) { + void ackCallback(); + void dataCallback(DataBlock); + } + + MessageBuffer mandatoryQueue, ordered="false", no_vector="true"; + DMASequencer dma_sequencer, factory='RubySystem::getDMASequencer(m_cfg["dma_sequencer"])', no_vector="true"; + State cur_state, no_vector="true"; + + State getState(Address addr) { + return cur_state; + } + void setState(Address addr, State state) { + cur_state := state; + } + + out_port(reqToDirectory_out, DMARequestMsg, reqToDirectory, desc="..."); + + in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, SequencerMsg) { + if (in_msg.Type == SequencerRequestType:LD ) { + trigger(Event:ReadRequest, in_msg.LineAddress); + } else if (in_msg.Type == SequencerRequestType:ST) { + trigger(Event:WriteRequest, in_msg.LineAddress); + } else { + error("Invalid request type"); + } + } + } + } + + in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") { + if (dmaResponseQueue_in.isReady()) { + peek( dmaResponseQueue_in, DMAResponseMsg) { + if (in_msg.Type == DMAResponseType:ACK) { + trigger(Event:Ack, in_msg.LineAddress); + } else if (in_msg.Type == DMAResponseType:DATA) { + trigger(Event:Data, in_msg.LineAddress); + } else { + error("Invalid response type"); + } + } + } + } + + action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:READ; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:WRITE; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.ackCallback(); + } + } + + action(d_dataCallback, "d", desc="Write data to dma sequencer") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.dataCallback(in_msg.DataBlk); + } + } + + action(p_popRequestQueue, "p", desc="Pop request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(p_popResponseQueue, "\p", desc="Pop request queue") { + dmaResponseQueue_in.dequeue(); + } + + transition(READY, ReadRequest, BUSY_RD) { + s_sendReadRequest; + p_popRequestQueue; + } + + transition(READY, WriteRequest, BUSY_WR) { + s_sendWriteRequest; + p_popRequestQueue; + } + + transition(BUSY_RD, Data, READY) { + d_dataCallback; + p_popResponseQueue; + } + + transition(BUSY_WR, Ack, READY) { + a_ackCallback; + p_popResponseQueue; + } +} diff --git a/src/mem/protocol/MOESI_CMP_token-msg.sm b/src/mem/protocol/MOESI_CMP_token-msg.sm index 2a75ce644..40c16b5e1 100644 --- a/src/mem/protocol/MOESI_CMP_token-msg.sm +++ b/src/mem/protocol/MOESI_CMP_token-msg.sm @@ -59,8 +59,10 @@ enumeration(CoherenceResponseType, desc="...") { // TriggerType enumeration(TriggerType, desc="...") { - REQUEST_TIMEOUT, desc="See corresponding event"; + REQUEST_TIMEOUT, desc="See corresponding event"; USE_TIMEOUT, desc="See corresponding event"; + DATA, desc="data for dma read response"; + DATA_ALL_TOKENS, desc="data and all tokens for dma write response"; } // TriggerMsg @@ -111,13 +113,45 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") { MessageSizeType MessageSize, desc="size category of the message"; } -GenericRequestType convertToGenericType(CoherenceRequestType type) { - if(type == CoherenceRequestType:GETS) { - return GenericRequestType:GETS; - } else if(type == CoherenceRequestType:GETX) { - return GenericRequestType:GETX; - } else { - DEBUG_EXPR(type); - error("invalid CoherenceRequestType"); - } +enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") { + READ, desc="Memory Read"; + WRITE, desc="Memory Write"; + NULL, desc="Invalid"; } + +enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") { + DATA, desc="DATA read"; + ACK, desc="ACK write"; + NULL, desc="Invalid"; +} + +structure(DMARequestMsg, desc="...", interface="NetworkMessage") { + DMARequestType Type, desc="Request type (read/write)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + int Len, desc="The length of the request"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +structure(DMAResponseMsg, desc="...", interface="NetworkMessage") { + DMAResponseType Type, desc="Response type (DATA/ACK)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +//GenericRequestType convertToGenericType(CoherenceRequestType type) { +// if(type == CoherenceRequestType:GETS) { +// return GenericRequestType:GETS; +// } else if(type == CoherenceRequestType:GETX) { +// return GenericRequestType:GETX; +// } else { +// DEBUG_EXPR(type); +// error("invalid CoherenceRequestType"); +// } +//} diff --git a/src/mem/protocol/MOESI_CMP_token.slicc b/src/mem/protocol/MOESI_CMP_token.slicc index ae4a6d6ec..a41226f90 100644 --- a/src/mem/protocol/MOESI_CMP_token.slicc +++ b/src/mem/protocol/MOESI_CMP_token.slicc @@ -2,4 +2,5 @@ MOESI_CMP_token-msg.sm MOESI_CMP_token-L1cache.sm MOESI_CMP_token-L2cache.sm MOESI_CMP_token-dir.sm +MOESI_CMP_token-dma.sm standard_CMP-protocol.sm diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm new file mode 100644 index 000000000..3b2240800 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -0,0 +1,1132 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 2009 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * AMD's contributions to the MOESI hammer protocol do not constitute an + * endorsement of its similarity to any AMD products. + * + * Authors: Milo Martin + * Brad Beckmann + */ + +machine(L1Cache, "AMD Hammer-like protocol") +: int cache_response_latency, + int issue_latency +{ + + // NETWORK BUFFERS + MessageBuffer requestFromCache, network="To", virtual_network="3", ordered="false"; + MessageBuffer responseFromCache, network="To", virtual_network="1", ordered="false"; + MessageBuffer unblockFromCache, network="To", virtual_network="0", ordered="false"; + + MessageBuffer forwardToCache, network="From", virtual_network="2", ordered="false"; + MessageBuffer responseToCache, network="From", virtual_network="1", ordered="false"; + + + // STATES + enumeration(State, desc="Cache states", default="L1Cache_State_I") { + // Base states + I, desc="Idle"; + S, desc="Shared"; + O, desc="Owned"; + M, desc="Modified (dirty)"; + MM, desc="Modified (dirty and locally modified)"; + + // Transient States + IM, "IM", desc="Issued GetX"; + SM, "SM", desc="Issued GetX, we still have an old copy of the line"; + OM, "OM", desc="Issued GetX, received data"; + ISM, "ISM", desc="Issued GetX, received data, waiting for all acks"; + M_W, "M^W", desc="Issued GetS, received exclusive data"; + MM_W, "MM^W", desc="Issued GetX, received exclusive data"; + IS, "IS", desc="Issued GetS"; + SS, "SS", desc="Issued GetS, received data, waiting for all acks"; + OI, "OI", desc="Issued PutO, waiting for ack"; + MI, "MI", desc="Issued PutX, waiting for ack"; + II, "II", desc="Issued PutX/O, saw Other_GETS or Other_GETX, waiting for ack"; + } + + // EVENTS + enumeration(Event, desc="Cache events") { + Load, desc="Load request from the processor"; + Ifetch, desc="I-fetch request from the processor"; + Store, desc="Store request from the processor"; + L2_Replacement, desc="L2 Replacement"; + L1_to_L2, desc="L1 to L2 transfer"; + L2_to_L1D, desc="L2 to L1-Data transfer"; + L2_to_L1I, desc="L2 to L1-Instruction transfer"; + + // Requests + Other_GETX, desc="A GetX from another processor"; + Other_GETS, desc="A GetS from another processor"; + + // Responses + Ack, desc="Received an ack message"; + Shared_Ack, desc="Received an ack message, responder has a shared copy"; + Data, desc="Received a data message"; + Shared_Data, desc="Received a data message, responder has a shared copy"; + Exclusive_Data, desc="Received a data message, responder had an exclusive copy, they gave it to us"; + + Writeback_Ack, desc="Writeback O.K. from directory"; + Writeback_Nack, desc="Writeback not O.K. from directory"; + + // Triggers + All_acks, desc="Received all required data and message acks"; + All_acks_no_sharers, desc="Received all acks and no other processor has a shared copy"; + } + + // TYPES + + // STRUCTURE DEFINITIONS + + MessageBuffer mandatoryQueue, ordered="false"; + Sequencer sequencer, factory='RubySystem::getSequencer(m_cfg["sequencer"])'; + + // CacheEntry + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + DataBlock DataBlk, desc="data for the block"; + } + + // TBE fields + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; + bool Sharers, desc="On a GetS, did we find any other sharers in the system"; + } + + external_type(CacheMemory) { + bool cacheAvail(Address); + Address cacheProbe(Address); + void allocate(Address, Entry); + void deallocate(Address); + Entry lookup(Address); + void changePermission(Address, AccessPermission); + bool isTagPresent(Address); + void profileMiss(CacheMsg); + } + + external_type(TBETable) { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } + + TBETable TBEs, template_hack="<L1Cache_TBE>"; + CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])'; + CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])'; + CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["l2cache"])'; + + Entry getCacheEntry(Address addr), return_by_ref="yes" { + if (L2cacheMemory.isTagPresent(addr)) { + return L2cacheMemory[addr]; + } else if (L1DcacheMemory.isTagPresent(addr)) { + return L1DcacheMemory[addr]; + } else { + return L1IcacheMemory[addr]; + } + } + + void changePermission(Address addr, AccessPermission permission) { + if (L2cacheMemory.isTagPresent(addr)) { + return L2cacheMemory.changePermission(addr, permission); + } else if (L1DcacheMemory.isTagPresent(addr)) { + return L1DcacheMemory.changePermission(addr, permission); + } else { + return L1IcacheMemory.changePermission(addr, permission); + } + } + + bool isCacheTagPresent(Address addr) { + return (L2cacheMemory.isTagPresent(addr) || L1DcacheMemory.isTagPresent(addr) || L1IcacheMemory.isTagPresent(addr)); + } + + State getState(Address addr) { + assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false); + assert((L1IcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false); + assert((L1DcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false); + + if(TBEs.isPresent(addr)) { + return TBEs[addr].TBEState; + } else if (isCacheTagPresent(addr)) { + return getCacheEntry(addr).CacheState; + } + return State:I; + } + + void setState(Address addr, State state) { + assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false); + assert((L1IcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false); + assert((L1DcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false); + + if (TBEs.isPresent(addr)) { + TBEs[addr].TBEState := state; + } + + if (isCacheTagPresent(addr)) { + getCacheEntry(addr).CacheState := state; + + // Set permission + if ((state == State:MM) || + (state == State:MM_W)) { + changePermission(addr, AccessPermission:Read_Write); + } else if (state == State:S || + state == State:O || + state == State:M || + state == State:M_W || + state == State:SM || + state == State:ISM || + state == State:OM || + state == State:SS) { + changePermission(addr, AccessPermission:Read_Only); + } else { + changePermission(addr, AccessPermission:Invalid); + } + } + } + + Event mandatory_request_type_to_event(CacheRequestType type) { + if (type == CacheRequestType:LD) { + return Event:Load; + } else if (type == CacheRequestType:IFETCH) { + return Event:Ifetch; + } else if ((type == CacheRequestType:ST) || (type == CacheRequestType:ATOMIC)) { + return Event:Store; + } else { + error("Invalid CacheRequestType"); + } + } + + MessageBuffer triggerQueue, ordered="true"; + + // ** OUT_PORTS ** + + out_port(requestNetwork_out, RequestMsg, requestFromCache); + out_port(responseNetwork_out, ResponseMsg, responseFromCache); + out_port(unblockNetwork_out, ResponseMsg, unblockFromCache); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + + // ** IN_PORTS ** + + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue) { + if (triggerQueue_in.isReady()) { + peek(triggerQueue_in, TriggerMsg) { + if (in_msg.Type == TriggerType:ALL_ACKS) { + trigger(Event:All_acks, in_msg.Address); + } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) { + trigger(Event:All_acks_no_sharers, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + // Nothing from the request network + + // Forward Network + in_port(forwardToCache_in, RequestMsg, forwardToCache) { + if (forwardToCache_in.isReady()) { + peek(forwardToCache_in, RequestMsg) { + if (in_msg.Type == CoherenceRequestType:GETX) { + trigger(Event:Other_GETX, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:GETS) { + trigger(Event:Other_GETS, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:WB_ACK) { + trigger(Event:Writeback_Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:WB_NACK) { + trigger(Event:Writeback_Nack, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + // Response Network + in_port(responseToCache_in, ResponseMsg, responseToCache) { + if (responseToCache_in.isReady()) { + peek(responseToCache_in, ResponseMsg) { + if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_SHARED) { + trigger(Event:Shared_Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA) { + trigger(Event:Data, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) { + trigger(Event:Shared_Data, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) { + trigger(Event:Exclusive_Data, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + // Nothing from the unblock network + + // Mandatory Queue + in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") { + if (mandatoryQueue_in.isReady()) { + peek(mandatoryQueue_in, CacheMsg) { + + // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache + + if (in_msg.Type == CacheRequestType:IFETCH) { + // ** INSTRUCTION ACCESS *** + + // Check to see if it is in the OTHER L1 + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { + // The block is in the wrong L1, try to write it to the L2 + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress); + } else { + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.LineAddress)); + } + } + + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { + // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); + } else { + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { + // L1 does't have the line, but we have space for it in the L1 + if (L2cacheMemory.isTagPresent(in_msg.LineAddress)) { + // L2 has it (maybe not with the right permissions) + trigger(Event:L2_to_L1I, in_msg.LineAddress); + } else { + // We have room, the L2 doesn't have it, so the L1 fetches the line + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); + } + } else { + // No room in the L1, so we need to make room + if (L2cacheMemory.cacheAvail(L1IcacheMemory.cacheProbe(in_msg.LineAddress))) { + // The L2 has room, so we move the line from the L1 to the L2 + trigger(Event:L1_to_L2, L1IcacheMemory.cacheProbe(in_msg.LineAddress)); + } else { + // The L2 does not have room, so we replace a line from the L2 + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1IcacheMemory.cacheProbe(in_msg.LineAddress))); + } + } + } + } else { + // *** DATA ACCESS *** + + // Check to see if it is in the OTHER L1 + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { + // The block is in the wrong L1, try to write it to the L2 + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress); + } else { + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.LineAddress)); + } + } + + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { + // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); + } else { + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { + // L1 does't have the line, but we have space for it in the L1 + if (L2cacheMemory.isTagPresent(in_msg.LineAddress)) { + // L2 has it (maybe not with the right permissions) + trigger(Event:L2_to_L1D, in_msg.LineAddress); + } else { + // We have room, the L2 doesn't have it, so the L1 fetches the line + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); + } + } else { + // No room in the L1, so we need to make room + if (L2cacheMemory.cacheAvail(L1DcacheMemory.cacheProbe(in_msg.LineAddress))) { + // The L2 has room, so we move the line from the L1 to the L2 + trigger(Event:L1_to_L2, L1DcacheMemory.cacheProbe(in_msg.LineAddress)); + } else { + // The L2 does not have room, so we replace a line from the L2 + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1DcacheMemory.cacheProbe(in_msg.LineAddress))); + } + } + } + } + } + } + } + + // ACTIONS + + action(a_issueGETS, "a", desc="Issue GETS") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); // One from each other cache (n-1) plus the memory (+1) + } + } + + action(b_issueGETX, "b", desc="Issue GETX") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); // One from each other cache (n-1) plus the memory (+1) + } + } + + action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := getCacheEntry(address).DataBlk; + out_msg.Dirty := getCacheEntry(address).Dirty; + out_msg.Acks := 2; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(d_issuePUT, "d", desc="Issue PUT") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:PUT; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + + action(e_sendData, "e", desc="Send data from cache to requestor") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := getCacheEntry(address).DataBlk; + out_msg.Dirty := getCacheEntry(address).Dirty; + out_msg.Acks := 2; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(ee_sendDataShared, "\e", desc="Send data from cache to requestor, keep a shared copy") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_SHARED; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := getCacheEntry(address).DataBlk; + out_msg.Dirty := getCacheEntry(address).Dirty; + out_msg.Acks := 2; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(f_sendAck, "f", desc="Send ack from cache to requestor") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:ACK; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Acks := 1; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + + action(ff_sendAckShared, "\f", desc="Send shared ack from cache to requestor") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:ACK_SHARED; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Acks := 1; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + + action(g_sendUnblock, "g", desc="Send unblock to memory") { + enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:UNBLOCK; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + } + } + + action(h_load_hit, "h", desc="Notify sequencer the load completed.") { + DEBUG_EXPR(getCacheEntry(address).DataBlk); + sequencer.readCallback(address, getCacheEntry(address).DataBlk); + } + + action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") { + DEBUG_EXPR(getCacheEntry(address).DataBlk); + sequencer.writeCallback(address, getCacheEntry(address).DataBlk); + getCacheEntry(address).Dirty := true; + } + + action(i_allocateTBE, "i", desc="Allocate TBE") { + check_allocate(TBEs); + TBEs.allocate(address); + TBEs[address].DataBlk := getCacheEntry(address).DataBlk; // Data only used for writebacks + TBEs[address].Dirty := getCacheEntry(address).Dirty; + TBEs[address].Sharers := false; + } + + action(j_popTriggerQueue, "j", desc="Pop trigger queue.") { + triggerQueue_in.dequeue(); + } + + action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") { + mandatoryQueue_in.dequeue(); + } + + action(l_popForwardQueue, "l", desc="Pop forwareded request queue.") { + forwardToCache_in.dequeue(); + } + + action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") { + peek(responseToCache_in, ResponseMsg) { + assert(in_msg.Acks > 0); + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - in_msg.Acks; + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + } + } + + action(n_popResponseQueue, "n", desc="Pop response queue") { + responseToCache_in.dequeue(); + } + + action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") { + if (TBEs[address].NumPendingMsgs == 0) { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.Address := address; + if (TBEs[address].Sharers) { + out_msg.Type := TriggerType:ALL_ACKS; + } else { + out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS; + } + } + } + } + + action(p_decrementNumberOfMessagesByOne, "p", desc="Decrement the number of messages for which we're waiting by one") { + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - 1; + } + + action(pp_incrementNumberOfMessagesByOne, "\p", desc="Increment the number of messages for which we're waiting by one") { + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs + 1; + } + + action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.Dirty := TBEs[address].Dirty; + out_msg.Acks := 2; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(qq_sendDataFromTBEToMemory, "\q", desc="Send data from TBE to memory") { + enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) { + out_msg.Address := address; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Dirty := TBEs[address].Dirty; + if (TBEs[address].Dirty) { + out_msg.Type := CoherenceResponseType:WB_DIRTY; + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } else { + out_msg.Type := CoherenceResponseType:WB_CLEAN; + // NOTE: in a real system this would not send data. We send + // data here only so we can check it at the memory + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(r_setSharerBit, "r", desc="We saw other sharers") { + TBEs[address].Sharers := true; + } + + action(s_deallocateTBE, "s", desc="Deallocate TBE") { + TBEs.deallocate(address); + } + + action(t_sendExclusiveDataFromTBEToMemory, "t", desc="Send exclusive data from TBE to memory") { + enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) { + out_msg.Address := address; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.Dirty := TBEs[address].Dirty; + if (TBEs[address].Dirty) { + out_msg.Type := CoherenceResponseType:WB_EXCLUSIVE_DIRTY; + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } else { + out_msg.Type := CoherenceResponseType:WB_EXCLUSIVE_CLEAN; + // NOTE: in a real system this would not send data. We send + // data here only so we can check it at the memory + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(u_writeDataToCache, "u", desc="Write data to cache") { + peek(responseToCache_in, ResponseMsg) { + getCacheEntry(address).DataBlk := in_msg.DataBlk; + getCacheEntry(address).Dirty := in_msg.Dirty; + } + } + + action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") { + peek(responseToCache_in, ResponseMsg) { + assert(getCacheEntry(address).DataBlk == in_msg.DataBlk); + getCacheEntry(address).DataBlk := in_msg.DataBlk; + getCacheEntry(address).Dirty := in_msg.Dirty; + } + } + + action(gg_deallocateL1CacheBlock, "\g", desc="Deallocate cache block. Sets the cache to invalid, allowing a replacement in parallel with a fetch.") { + if (L1DcacheMemory.isTagPresent(address)) { + L1DcacheMemory.deallocate(address); + } else { + L1IcacheMemory.deallocate(address); + } + } + + action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") { + if (L1DcacheMemory.isTagPresent(address) == false) { + L1DcacheMemory.allocate(address, new Entry); + } + } + + action(jj_allocateL1ICacheBlock, "\j", desc="Set L1 I-cache tag equal to tag of block B.") { + if (L1IcacheMemory.isTagPresent(address) == false) { + L1IcacheMemory.allocate(address, new Entry); + } + } + + action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") { + L2cacheMemory.allocate(address, new Entry); + } + + action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { + L2cacheMemory.deallocate(address); + } + + action(ss_copyFromL1toL2, "\s", desc="Copy data block from L1 (I or D) to L2") { + if (L1DcacheMemory.isTagPresent(address)) { + L2cacheMemory[address] := L1DcacheMemory[address]; + } else { + L2cacheMemory[address] := L1IcacheMemory[address]; + } + } + + action(tt_copyFromL2toL1, "\t", desc="Copy data block from L2 to L1 (I or D)") { + if (L1DcacheMemory.isTagPresent(address)) { + L1DcacheMemory[address] := L2cacheMemory[address]; + } else { + L1IcacheMemory[address] := L2cacheMemory[address]; + } + } + + action(uu_profileMiss, "\u", desc="Profile the demand miss") { + peek(mandatoryQueue_in, CacheMsg) { + if (L1IcacheMemory.isTagPresent(address)) { + L1IcacheMemory.profileMiss(in_msg); + } else if (L1DcacheMemory.isTagPresent(address)) { + L1DcacheMemory.profileMiss(in_msg); + } else { + L2cacheMemory.profileMiss(in_msg); + } + } + } + + action(zz_recycleMandatoryQueue, "\z", desc="Send the head of the mandatory queue to the back of the queue.") { + mandatoryQueue_in.recycle(); + } + + //***************************************************** + // TRANSITIONS + //***************************************************** + + // Transitions for Load/Store/L2_Replacement from transient states + transition({IM, SM, ISM, OM, IS, SS, OI, MI, II}, {Store, L2_Replacement}) { + zz_recycleMandatoryQueue; + } + + transition({M_W, MM_W}, {L2_Replacement}) { + zz_recycleMandatoryQueue; + } + + transition({IM, IS, OI, MI, II}, {Load, Ifetch}) { + zz_recycleMandatoryQueue; + } + + transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II}, L1_to_L2) { + zz_recycleMandatoryQueue; + } + + // Transitions moving data between the L1 and L2 caches + transition({I, S, O, M, MM}, L1_to_L2) { + vv_allocateL2CacheBlock; + ss_copyFromL1toL2; // Not really needed for state I + gg_deallocateL1CacheBlock; + } + + transition({I, S, O, M, MM}, L2_to_L1D) { + ii_allocateL1DCacheBlock; + tt_copyFromL2toL1; // Not really needed for state I + rr_deallocateL2CacheBlock; + } + + transition({I, S, O, M, MM}, L2_to_L1I) { + jj_allocateL1ICacheBlock; + tt_copyFromL2toL1; // Not really needed for state I + rr_deallocateL2CacheBlock; + } + + // Transitions from Idle + transition(I, Load, IS) { + ii_allocateL1DCacheBlock; + i_allocateTBE; + a_issueGETS; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(I, Ifetch, IS) { + jj_allocateL1ICacheBlock; + i_allocateTBE; + a_issueGETS; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(I, Store, IM) { + ii_allocateL1DCacheBlock; + i_allocateTBE; + b_issueGETX; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(I, L2_Replacement) { + rr_deallocateL2CacheBlock; + } + + transition(I, {Other_GETX, Other_GETS}) { + f_sendAck; + l_popForwardQueue; + } + + // Transitions from Shared + transition({S, SM, ISM}, {Load, Ifetch}) { + h_load_hit; + k_popMandatoryQueue; + } + + transition(S, Store, SM) { + i_allocateTBE; + b_issueGETX; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(S, L2_Replacement, I) { + rr_deallocateL2CacheBlock; + } + + transition(S, Other_GETX, I) { + f_sendAck; + l_popForwardQueue; + } + + transition(S, Other_GETS) { + ff_sendAckShared; + l_popForwardQueue; + } + + // Transitions from Owned + transition({O, OM, SS, MM_W, M_W}, {Load, Ifetch}) { + h_load_hit; + k_popMandatoryQueue; + } + + transition(O, Store, OM) { + i_allocateTBE; + b_issueGETX; + p_decrementNumberOfMessagesByOne; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(O, L2_Replacement, OI) { + i_allocateTBE; + d_issuePUT; + rr_deallocateL2CacheBlock; + } + + transition(O, Other_GETX, I) { + e_sendData; + l_popForwardQueue; + } + + transition(O, Other_GETS) { + ee_sendDataShared; + l_popForwardQueue; + } + + // Transitions from Modified + transition(MM, {Load, Ifetch}) { + h_load_hit; + k_popMandatoryQueue; + } + + transition(MM, Store) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(MM, L2_Replacement, MI) { + i_allocateTBE; + d_issuePUT; + rr_deallocateL2CacheBlock; + } + + transition(MM, Other_GETX, I) { + c_sendExclusiveData; + l_popForwardQueue; + } + + transition(MM, Other_GETS, I) { + c_sendExclusiveData; + l_popForwardQueue; + } + + // Transitions from Dirty Exclusive + transition(M, {Load, Ifetch}) { + h_load_hit; + k_popMandatoryQueue; + } + + transition(M, Store, MM) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(M, L2_Replacement, MI) { + i_allocateTBE; + d_issuePUT; + rr_deallocateL2CacheBlock; + } + + transition(M, Other_GETX, I) { + c_sendExclusiveData; + l_popForwardQueue; + } + + transition(M, Other_GETS, O) { + ee_sendDataShared; + l_popForwardQueue; + } + + // Transitions from IM + + transition(IM, {Other_GETX, Other_GETS}) { + f_sendAck; + l_popForwardQueue; + } + + transition(IM, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(IM, Data, ISM) { + u_writeDataToCache; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(IM, Exclusive_Data, MM_W) { + u_writeDataToCache; + m_decrementNumberOfMessages; + o_checkForCompletion; + hh_store_hit; + n_popResponseQueue; + } + + // Transitions from SM + transition(SM, Other_GETS) { + ff_sendAckShared; + l_popForwardQueue; + } + + transition(SM, Other_GETX, IM) { + f_sendAck; + l_popForwardQueue; + } + + transition(SM, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(SM, Data, ISM) { + v_writeDataToCacheVerify; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + // Transitions from ISM + transition(ISM, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(ISM, All_acks_no_sharers, MM) { + hh_store_hit; + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from OM + + transition(OM, Other_GETX, IM) { + e_sendData; + pp_incrementNumberOfMessagesByOne; + l_popForwardQueue; + } + + transition(OM, Other_GETS) { + ee_sendDataShared; + l_popForwardQueue; + } + + transition(OM, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(OM, {All_acks, All_acks_no_sharers}, MM) { + hh_store_hit; + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from IS + + transition(IS, {Other_GETX, Other_GETS}) { + f_sendAck; + l_popForwardQueue; + } + + transition(IS, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(IS, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(IS, Data, SS) { + u_writeDataToCache; + m_decrementNumberOfMessages; + o_checkForCompletion; + h_load_hit; + n_popResponseQueue; + } + + transition(IS, Exclusive_Data, M_W) { + u_writeDataToCache; + m_decrementNumberOfMessages; + o_checkForCompletion; + h_load_hit; + n_popResponseQueue; + } + + transition(IS, Shared_Data, SS) { + u_writeDataToCache; + r_setSharerBit; + m_decrementNumberOfMessages; + o_checkForCompletion; + h_load_hit; + n_popResponseQueue; + } + + // Transitions from SS + + transition(SS, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(SS, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(SS, All_acks, S) { + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + transition(SS, All_acks_no_sharers, S) { + // Note: The directory might still be the owner, so that is why we go to S + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from MM_W + + transition(MM_W, Store) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(MM_W, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(MM_W, All_acks_no_sharers, MM) { + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from M_W + + transition(M_W, Store, MM_W) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(M_W, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(M_W, All_acks_no_sharers, M) { + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from OI/MI + + transition({OI, MI}, Other_GETX, II) { + q_sendDataFromTBEToCache; + l_popForwardQueue; + } + + transition({OI, MI}, Other_GETS, OI) { + q_sendDataFromTBEToCache; + l_popForwardQueue; + } + + transition(MI, Writeback_Ack, I) { + t_sendExclusiveDataFromTBEToMemory; + s_deallocateTBE; + l_popForwardQueue; + } + + transition(OI, Writeback_Ack, I) { + qq_sendDataFromTBEToMemory; + s_deallocateTBE; + l_popForwardQueue; + } + + // Transitions from II + transition(II, {Other_GETS, Other_GETX}, II) { + f_sendAck; + l_popForwardQueue; + } + + transition(II, Writeback_Ack, I) { + g_sendUnblock; + s_deallocateTBE; + l_popForwardQueue; + } + + transition(II, Writeback_Nack, I) { + s_deallocateTBE; + l_popForwardQueue; + } +} + diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm new file mode 100644 index 000000000..b9b001e40 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -0,0 +1,920 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 2009 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * AMD's contributions to the MOESI hammer protocol do not constitute an + * endorsement of its similarity to any AMD products. + * + * Authors: Milo Martin + * Brad Beckmann + */ + +machine(Directory, "AMD Hammer-like protocol") +: int memory_controller_latency +{ + + MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false"; + MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; + // + // For a finite buffered network, note that the DMA response network only + // works at this relatively higher numbered (lower priority) virtual network + // because the trigger queue decouples cache responses from DMA responses. + // + MessageBuffer dmaResponseFromDir, network="To", virtual_network="4", ordered="true"; + + MessageBuffer unblockToDir, network="From", virtual_network="0", ordered="false"; + MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false"; + MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; + MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; + + // STATES + enumeration(State, desc="Directory states", default="Directory_State_E") { + // Base states + NO, desc="Not Owner"; + O, desc="Owner"; + E, desc="Exclusive Owner (we can provide the data in exclusive)"; + NO_B, "NO^B", desc="Not Owner, Blocked"; + O_B, "O^B", desc="Owner, Blocked"; + NO_B_W, desc="Not Owner, Blocked, waiting for Dram"; + O_B_W, desc="Owner, Blocked, waiting for Dram"; + NO_W, desc="Not Owner, waiting for Dram"; + O_W, desc="Owner, waiting for Dram"; + NO_DW_B_W, desc="Not Owner, Dma Write waiting for Dram and cache responses"; + NO_DR_B_W, desc="Not Owner, Dma Read waiting for Dram and cache responses"; + NO_DR_B_D, desc="Not Owner, Dma Read waiting for cache responses including dirty data"; + NO_DR_B, desc="Not Owner, Dma Read waiting for cache responses"; + NO_DW_W, desc="Not Owner, Dma Write waiting for Dram"; + O_DR_B_W, desc="Owner, Dma Read waiting for Dram and cache responses"; + O_DR_B, desc="Owner, Dma Read waiting for cache responses"; + WB, desc="Blocked on a writeback"; + WB_O_W, desc="Blocked on memory write, will go to O"; + WB_E_W, desc="Blocked on memory write, will go to E"; + } + + // Events + enumeration(Event, desc="Directory events") { + GETX, desc="A GETX arrives"; + GETS, desc="A GETS arrives"; + PUT, desc="A PUT arrives"; + Unblock, desc="An unblock message arrives"; + Writeback_Clean, desc="The final part of a PutX (no data)"; + Writeback_Dirty, desc="The final part of a PutX (data)"; + Writeback_Exclusive_Clean, desc="The final part of a PutX (no data, exclusive)"; + Writeback_Exclusive_Dirty, desc="The final part of a PutX (data, exclusive)"; + + // DMA requests + DMA_READ, desc="A DMA Read memory request"; + DMA_WRITE, desc="A DMA Write memory request"; + + // Memory Controller + Memory_Data, desc="Fetched data from memory arrives"; + Memory_Ack, desc="Writeback Ack from memory arrives"; + + // Cache responses required to handle DMA + Ack, desc="Received an ack message"; + Shared_Ack, desc="Received an ack message, responder has a shared copy"; + Shared_Data, desc="Received a data message, responder has a shared copy"; + Exclusive_Data, desc="Received a data message, responder had an exclusive copy, they gave it to us"; + + // Triggers + All_acks_and_data, desc="Received all required data and message acks"; + All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy"; + } + + // TYPES + + // DirectoryEntry + structure(Entry, desc="...") { + State DirectoryState, desc="Directory state"; + DataBlock DataBlk, desc="data for the block"; + } + + external_type(DirectoryMemory) { + Entry lookup(Address); + bool isPresent(Address); + } + + external_type(MemoryControl, inport="yes", outport="yes") { + + } + + // TBE entries for DMA requests + structure(TBE, desc="TBE entries for outstanding DMA requests") { + Address PhysicalAddress, desc="physical address"; + State TBEState, desc="Transient State"; + CoherenceResponseType ResponseType, desc="The type for the subsequent response message"; + DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory"; + DataBlock DataBlk, desc="The current view of system memory"; + int Len, desc="..."; + MachineID DmaRequestor, desc="DMA requestor"; + int NumPendingMsgs, desc="Number of pending acks/messages"; + bool CacheDirty, desc="Indicates whether a cache has responded with dirty data"; + bool Sharers, desc="Indicates whether a cache has indicated it is currently a sharer"; + } + + external_type(TBETable) { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } + + // ** OBJECTS ** + + DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; + + MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; + + TBETable TBEs, template_hack="<Directory_TBE>"; + + State getState(Address addr) { + if (TBEs.isPresent(addr)) { + return TBEs[addr].TBEState; + } else { + return directory[addr].DirectoryState; + } + } + + void setState(Address addr, State state) { + if (TBEs.isPresent(addr)) { + TBEs[addr].TBEState := state; + } + directory[addr].DirectoryState := state; + } + + MessageBuffer triggerQueue, ordered="true"; + + // ** OUT_PORTS ** + out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests + out_port(forwardNetwork_out, RequestMsg, forwardFromDir); + out_port(responseNetwork_out, ResponseMsg, responseFromDir); + out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + + // + // Memory buffer for memory controller to DIMM communication + // + out_port(memQueue_out, MemoryMsg, memBuffer); + + // ** IN_PORTS ** + + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue) { + if (triggerQueue_in.isReady()) { + peek(triggerQueue_in, TriggerMsg) { + if (in_msg.Type == TriggerType:ALL_ACKS) { + trigger(Event:All_acks_and_data, in_msg.Address); + } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) { + trigger(Event:All_acks_and_data_no_sharers, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + in_port(unblockNetwork_in, ResponseMsg, unblockToDir) { + if (unblockNetwork_in.isReady()) { + peek(unblockNetwork_in, ResponseMsg) { + if (in_msg.Type == CoherenceResponseType:UNBLOCK) { + trigger(Event:Unblock, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:WB_CLEAN) { + trigger(Event:Writeback_Clean, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:WB_DIRTY) { + trigger(Event:Writeback_Dirty, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:WB_EXCLUSIVE_CLEAN) { + trigger(Event:Writeback_Exclusive_Clean, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:WB_EXCLUSIVE_DIRTY) { + trigger(Event:Writeback_Exclusive_Dirty, in_msg.Address); + } else { + error("Invalid message"); + } + } + } + } + + // Response Network + in_port(responseToDir_in, ResponseMsg, responseToDir) { + if (responseToDir_in.isReady()) { + peek(responseToDir_in, ResponseMsg) { + if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_SHARED) { + trigger(Event:Shared_Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) { + trigger(Event:Shared_Data, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) { + trigger(Event:Exclusive_Data, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, DMARequestMsg) { + if (in_msg.Type == DMARequestType:READ) { + trigger(Event:DMA_READ, in_msg.LineAddress); + } else if (in_msg.Type == DMARequestType:WRITE) { + trigger(Event:DMA_WRITE, in_msg.LineAddress); + } else { + error("Invalid message"); + } + } + } + } + + in_port(requestQueue_in, RequestMsg, requestToDir) { + if (requestQueue_in.isReady()) { + peek(requestQueue_in, RequestMsg) { + if (in_msg.Type == CoherenceRequestType:GETS) { + trigger(Event:GETS, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:GETX) { + trigger(Event:GETX, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:PUT) { + trigger(Event:PUT, in_msg.Address); + } else { + error("Invalid message"); + } + } + } + } + + // off-chip memory request/response is done + in_port(memQueue_in, MemoryMsg, memBuffer) { + if (memQueue_in.isReady()) { + peek(memQueue_in, MemoryMsg) { + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:Memory_Data, in_msg.Address); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:Memory_Ack, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + + // Actions + + action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:WB_ACK; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:WB_NACK; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(v_allocateTBE, "v", desc="Allocate TBE") { + peek(requestQueue_in, RequestMsg) { + TBEs.allocate(address); + TBEs[address].PhysicalAddress := address; + TBEs[address].ResponseType := CoherenceResponseType:NULL; + } + } + + action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaDataBlk := in_msg.DataBlk; + TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; + // + // One ack for each last-level cache + // + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); + // + // Assume initially that the caches store a clean copy and that memory + // will provide the data + // + TBEs[address].CacheDirty := false; + } + } + + action(w_deallocateTBE, "w", desc="Deallocate TBE") { + TBEs.deallocate(address); + } + + action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") { + peek(responseToDir_in, ResponseMsg) { + assert(in_msg.Acks > 0); + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + // + // Note that cache data responses will have an ack count of 2. However, + // directory DMA requests must wait for acks from all LLC caches, so + // only decrement by 1. + // + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - 1; + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + } + } + + action(n_popResponseQueue, "n", desc="Pop response queue") { + responseToDir_in.dequeue(); + } + + action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") { + if (TBEs[address].NumPendingMsgs == 0) { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.Address := address; + if (TBEs[address].Sharers) { + out_msg.Type := TriggerType:ALL_ACKS; + } else { + out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS; + } + } + } + } + + action(d_sendData, "d", desc="Send data to requestor") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := TBEs[address].ResponseType; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.OriginalRequestorMachId); + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Dirty := false; // By definition, the block is now clean + out_msg.Acks := 1; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dr_sendDmaData, "dr", desc="Send Data to DMA controller from memory") { + peek(memQueue_in, MemoryMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dt_sendDmaDataFromTbe, "dt", desc="Send Data to DMA controller from tbe") { + peek(triggerQueue_in, TriggerMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:ACK; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + + action(rx_recordExclusiveInTBE, "rx", desc="Record Exclusive in TBE") { + peek(requestQueue_in, RequestMsg) { + TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; + } + } + + action(r_recordDataInTBE, "rt", desc="Record Data in TBE") { + peek(requestQueue_in, RequestMsg) { + TBEs[address].ResponseType := CoherenceResponseType:DATA; + } + } + + action(r_setSharerBit, "r", desc="We saw other sharers") { + TBEs[address].Sharers := true; + } + + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { + peek(requestQueue_in, RequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(qd_queueMemoryRequestFromDmaRead, "qd", desc="Queue off-chip fetch request") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(f_forwardRequest, "f", desc="Forward requests") { + if (getNumberOfLastLevelCaches() > 1) { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches + out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + } + + action(f_forwardWriteFromDma, "fw", desc="Forward requests") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + // + // Send to all L1 caches, since the requestor is the memory controller + // itself + // + out_msg.Requestor := machineID; + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + + action(f_forwardReadFromDma, "fr", desc="Forward requests") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + // + // Send to all L1 caches, since the requestor is the memory controller + // itself + // + out_msg.Requestor := machineID; + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + + action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") { + requestQueue_in.dequeue(); + } + + action(j_popIncomingUnblockQueue, "j", desc="Pop incoming unblock queue") { + unblockNetwork_in.dequeue(); + } + + action(l_popMemQueue, "q", desc="Pop off-chip request queue") { + memQueue_in.dequeue(); + } + + action(g_popTriggerQueue, "g", desc="Pop trigger queue") { + triggerQueue_in.dequeue(); + } + + action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") { + dmaRequestQueue_in.recycle(); + } + + action(r_recordMemoryData, "rd", desc="record data from memory to TBE") { + peek(memQueue_in, MemoryMsg) { + if (TBEs[address].CacheDirty == false) { + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + } + + action(r_recordCacheData, "rc", desc="record data from cache response to TBE") { + peek(responseToDir_in, ResponseMsg) { + TBEs[address].CacheDirty := true; + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + + action(l_writeDataToMemory, "l", desc="Write PUTX/PUTO data to memory") { + peek(unblockNetwork_in, ResponseMsg) { + assert(in_msg.Dirty); + assert(in_msg.MessageSize == MessageSizeType:Writeback_Data); + directory[address].DataBlk := in_msg.DataBlk; + DEBUG_EXPR(in_msg.Address); + DEBUG_EXPR(in_msg.DataBlk); + } + } + + action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { + directory[address].DataBlk := TBEs[address].DataBlk; + directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + } + + action(a_assertCacheData, "ac", desc="Assert that a cache provided the data") { + assert(TBEs[address].CacheDirty); + } + + action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") { + peek(unblockNetwork_in, ResponseMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + DEBUG_EXPR(out_msg); + } + } + } + + action(ld_queueMemoryDmaWrite, "ld", desc="Write DMA data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + // then add the dma write data + out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + DEBUG_EXPR(out_msg); + } + } + + action(ll_checkIncomingWriteback, "\l", desc="Check PUTX/PUTO response message") { + peek(unblockNetwork_in, ResponseMsg) { + assert(in_msg.Dirty == false); + assert(in_msg.MessageSize == MessageSizeType:Writeback_Control); + + // NOTE: The following check would not be valid in a real + // implementation. We include the data in the "dataless" + // message so we can assert the clean data matches the datablock + // in memory + assert(directory[address].DataBlk == in_msg.DataBlk); + } + } + + action(zz_recycleRequest, "\z", desc="Recycle the request queue") { + requestQueue_in.recycle(); + } + + // TRANSITIONS + + // Transitions out of E state + transition(E, GETX, NO_B_W) { + v_allocateTBE; + rx_recordExclusiveInTBE; + qf_queueMemoryFetchRequest; + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(E, GETS, NO_B_W) { + v_allocateTBE; + rx_recordExclusiveInTBE; + qf_queueMemoryFetchRequest; + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(E, DMA_READ, NO_DR_B_W) { + vd_allocateDmaRequestInTBE; + qd_queueMemoryRequestFromDmaRead; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + // Transitions out of O state + transition(O, GETX, NO_B_W) { + v_allocateTBE; + r_recordDataInTBE; + qf_queueMemoryFetchRequest; + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(O, GETS, O_B_W) { + v_allocateTBE; + r_recordDataInTBE; + qf_queueMemoryFetchRequest; + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(O, DMA_READ, O_DR_B_W) { + vd_allocateDmaRequestInTBE; + qd_queueMemoryRequestFromDmaRead; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + transition({E, O, NO}, DMA_WRITE, NO_DW_B_W) { + vd_allocateDmaRequestInTBE; + f_forwardWriteFromDma; + p_popDmaRequestQueue; + } + + // Transitions out of NO state + transition(NO, GETX, NO_B) { + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(NO, GETS, NO_B) { + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(NO, PUT, WB) { + a_sendWriteBackAck; + i_popIncomingRequestQueue; + } + + transition(NO, DMA_READ, NO_DR_B_D) { + vd_allocateDmaRequestInTBE; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + // Nack PUT requests when races cause us to believe we own the data + transition({O, E}, PUT) { + b_sendWriteBackNack; + i_popIncomingRequestQueue; + } + + // Blocked transient states + transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, + NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, + NO_W, O_W, WB, WB_E_W, WB_O_W}, + {GETS, GETX, PUT}) { + zz_recycleRequest; + } + + transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, + NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, + NO_W, O_W, WB, WB_E_W, WB_O_W}, + {DMA_READ, DMA_WRITE}) { + y_recycleDmaRequestQueue; + } + + transition(NO_B, Unblock, NO) { + j_popIncomingUnblockQueue; + } + + transition(O_B, Unblock, O) { + j_popIncomingUnblockQueue; + } + + transition(NO_B_W, Memory_Data, NO_B) { + d_sendData; + w_deallocateTBE; + l_popMemQueue; + } + + transition(NO_DR_B_W, Memory_Data, NO_DR_B) { + r_recordMemoryData; + o_checkForCompletion; + l_popMemQueue; + } + + transition(O_DR_B_W, Memory_Data, O_DR_B) { + r_recordMemoryData; + dr_sendDmaData; + o_checkForCompletion; + l_popMemQueue; + } + + transition({NO_DR_B, O_DR_B, NO_DR_B_D, NO_DW_B_W}, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Ack) { + m_decrementNumberOfMessages; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D}, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Shared_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D}, Shared_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Exclusive_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D, NO_DW_B_W}, Exclusive_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B, All_acks_and_data, O) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B_D, All_acks_and_data, O) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(O_DR_B, All_acks_and_data_no_sharers, O) { + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B, All_acks_and_data_no_sharers, E) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B_D, All_acks_and_data_no_sharers, E) { + a_assertCacheData; + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DW_B_W, All_acks_and_data_no_sharers, NO_DW_W) { + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWrite; + g_popTriggerQueue; + } + + transition(NO_DW_W, Memory_Ack, E) { + da_sendDmaAck; + w_deallocateTBE; + l_popMemQueue; + } + + transition(O_B_W, Memory_Data, O_B) { + d_sendData; + w_deallocateTBE; + l_popMemQueue; + } + + transition(NO_B_W, Unblock, NO_W) { + j_popIncomingUnblockQueue; + } + + transition(O_B_W, Unblock, O_W) { + j_popIncomingUnblockQueue; + } + + transition(NO_W, Memory_Data, NO) { + w_deallocateTBE; + l_popMemQueue; + } + + transition(O_W, Memory_Data, O) { + w_deallocateTBE; + l_popMemQueue; + } + + // WB State Transistions + transition(WB, Writeback_Dirty, WB_E_W) { + l_writeDataToMemory; + l_queueMemoryWBRequest; + j_popIncomingUnblockQueue; + } + + transition(WB, Writeback_Exclusive_Dirty, WB_O_W) { + l_writeDataToMemory; + l_queueMemoryWBRequest; + j_popIncomingUnblockQueue; + } + + transition(WB_E_W, Memory_Ack, E) { + l_popMemQueue; + } + + transition(WB_O_W, Memory_Ack, O) { + l_popMemQueue; + } + + transition(WB, Writeback_Clean, O) { + ll_checkIncomingWriteback; + j_popIncomingUnblockQueue; + } + + transition(WB, Writeback_Exclusive_Clean, E) { + ll_checkIncomingWriteback; + j_popIncomingUnblockQueue; + } + + transition(WB, Unblock, NO) { + j_popIncomingUnblockQueue; + } +} diff --git a/src/mem/protocol/MOESI_hammer-dma.sm b/src/mem/protocol/MOESI_hammer-dma.sm new file mode 100644 index 000000000..b217923a4 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer-dma.sm @@ -0,0 +1,165 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +machine(DMA, "DMA Controller") +: int request_latency +{ + + MessageBuffer responseFromDir, network="From", virtual_network="4", ordered="true", no_vector="true"; + MessageBuffer reqToDirectory, network="To", virtual_network="5", ordered="false", no_vector="true"; + + enumeration(State, desc="DMA states", default="DMA_State_READY") { + READY, desc="Ready to accept a new request"; + BUSY_RD, desc="Busy: currently processing a request"; + BUSY_WR, desc="Busy: currently processing a request"; + } + + enumeration(Event, desc="DMA events") { + ReadRequest, desc="A new read request"; + WriteRequest, desc="A new write request"; + Data, desc="Data from a DMA memory read"; + Ack, desc="DMA write to memory completed"; + } + + external_type(DMASequencer) { + void ackCallback(); + void dataCallback(DataBlock); + } + + MessageBuffer mandatoryQueue, ordered="false", no_vector="true"; + DMASequencer dma_sequencer, factory='RubySystem::getDMASequencer(m_cfg["dma_sequencer"])', no_vector="true"; + State cur_state, no_vector="true"; + + State getState(Address addr) { + return cur_state; + } + void setState(Address addr, State state) { + cur_state := state; + } + + out_port(reqToDirectory_out, DMARequestMsg, reqToDirectory, desc="..."); + + in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, SequencerMsg) { + if (in_msg.Type == SequencerRequestType:LD ) { + trigger(Event:ReadRequest, in_msg.LineAddress); + } else if (in_msg.Type == SequencerRequestType:ST) { + trigger(Event:WriteRequest, in_msg.LineAddress); + } else { + error("Invalid request type"); + } + } + } + } + + in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") { + if (dmaResponseQueue_in.isReady()) { + peek( dmaResponseQueue_in, DMAResponseMsg) { + if (in_msg.Type == DMAResponseType:ACK) { + trigger(Event:Ack, in_msg.LineAddress); + } else if (in_msg.Type == DMAResponseType:DATA) { + trigger(Event:Data, in_msg.LineAddress); + } else { + error("Invalid response type"); + } + } + } + } + + action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:READ; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:WRITE; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.ackCallback(); + } + } + + action(d_dataCallback, "d", desc="Write data to dma sequencer") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.dataCallback(in_msg.DataBlk); + } + } + + action(p_popRequestQueue, "p", desc="Pop request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(p_popResponseQueue, "\p", desc="Pop request queue") { + dmaResponseQueue_in.dequeue(); + } + + transition(READY, ReadRequest, BUSY_RD) { + s_sendReadRequest; + p_popRequestQueue; + } + + transition(READY, WriteRequest, BUSY_WR) { + s_sendWriteRequest; + p_popRequestQueue; + } + + transition(BUSY_RD, Data, READY) { + d_dataCallback; + p_popResponseQueue; + } + + transition(BUSY_WR, Ack, READY) { + a_ackCallback; + p_popResponseQueue; + } +} diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm new file mode 100644 index 000000000..5d8226eb6 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer-msg.sm @@ -0,0 +1,119 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * AMD's contributions to the MOESI hammer protocol do not constitute an + * endorsement of its similarity to any AMD products. + */ + +// CoherenceRequestType +enumeration(CoherenceRequestType, desc="...") { + GETX, desc="Get eXclusive"; + GETS, desc="Get Shared"; + PUT, desc="Put Ownership"; + WB_ACK, desc="Writeback ack"; + WB_NACK, desc="Writeback neg. ack"; +} + +// CoherenceResponseType +enumeration(CoherenceResponseType, desc="...") { + ACK, desc="ACKnowledgment, responder does not have a copy"; + ACK_SHARED, desc="ACKnowledgment, responder has a shared copy"; + DATA, desc="Data, responder does not have a copy"; + DATA_SHARED, desc="Data, responder has a shared copy"; + DATA_EXCLUSIVE, desc="Data, responder was exclusive, gave us a copy, and they went to invalid"; + WB_CLEAN, desc="Clean writeback"; + WB_DIRTY, desc="Dirty writeback"; + WB_EXCLUSIVE_CLEAN, desc="Clean writeback of exclusive data"; + WB_EXCLUSIVE_DIRTY, desc="Dirty writeback of exclusive data"; + UNBLOCK, desc="Unblock"; + NULL, desc="Null value"; +} + +// TriggerType +enumeration(TriggerType, desc="...") { + ALL_ACKS, desc="See corresponding event"; + ALL_ACKS_NO_SHARERS, desc="See corresponding event"; +} + +// TriggerMsg +structure(TriggerMsg, desc="...", interface="Message") { + Address Address, desc="Physical address for this request"; + TriggerType Type, desc="Type of trigger"; +} + +// RequestMsg (and also forwarded requests) +structure(RequestMsg, desc="...", interface="NetworkMessage") { + Address Address, desc="Physical address for this request"; + CoherenceRequestType Type, desc="Type of request (GetS, GetX, PutX, etc)"; + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Multicast destination mask"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +// ResponseMsg (and also unblock requests) +structure(ResponseMsg, desc="...", interface="NetworkMessage") { + Address Address, desc="Physical address for this request"; + CoherenceResponseType Type, desc="Type of response (Ack, Data, etc)"; + MachineID Sender, desc="Node who sent the data"; + NetDest Destination, desc="Node to whom the data is sent"; + DataBlock DataBlk, desc="data for the cache line"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int Acks, desc="How many messages this counts as"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") { + READ, desc="Memory Read"; + WRITE, desc="Memory Write"; + NULL, desc="Invalid"; +} + +enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") { + DATA, desc="DATA read"; + ACK, desc="ACK write"; + NULL, desc="Invalid"; +} + +structure(DMARequestMsg, desc="...", interface="NetworkMessage") { + DMARequestType Type, desc="Request type (read/write)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + int Len, desc="The length of the request"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +structure(DMAResponseMsg, desc="...", interface="NetworkMessage") { + DMAResponseType Type, desc="Response type (DATA/ACK)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + MessageSizeType MessageSize, desc="size category of the message"; +} diff --git a/src/mem/protocol/MOESI_hammer.slicc b/src/mem/protocol/MOESI_hammer.slicc new file mode 100644 index 000000000..31ad47c2e --- /dev/null +++ b/src/mem/protocol/MOESI_hammer.slicc @@ -0,0 +1,5 @@ +MOESI_hammer-msg.sm +MOESI_hammer-cache.sm +MOESI_hammer-dir.sm +MOESI_hammer-dma.sm +standard_1level_CMP-protocol.sm diff --git a/src/mem/protocol/RubySlicc_ComponentMapping.sm b/src/mem/protocol/RubySlicc_ComponentMapping.sm index 0da1a05e2..891820c46 100644 --- a/src/mem/protocol/RubySlicc_ComponentMapping.sm +++ b/src/mem/protocol/RubySlicc_ComponentMapping.sm @@ -29,6 +29,8 @@ // Mapping functions +int getNumberOfLastLevelCaches(); + // NodeID map_address_to_node(Address addr); MachineID mapAddressToRange(Address addr, MachineType type, int low, int high); NetDest broadcast(MachineType type); diff --git a/src/mem/protocol/RubySlicc_Util.sm b/src/mem/protocol/RubySlicc_Util.sm index 312682bd7..e1771448f 100644 --- a/src/mem/protocol/RubySlicc_Util.sm +++ b/src/mem/protocol/RubySlicc_Util.sm @@ -52,7 +52,6 @@ void dirProfileCoherenceRequest(NodeID node, bool needCLB); bool isPerfectProtocol(); bool L1trainsPrefetcher(); int max_tokens(); -int N_tokens(); bool distributedPersistentEnabled(); Address setOffset(Address addr, int offset); Address makeLineAddress(Address addr); diff --git a/src/mem/protocol/SConscript b/src/mem/protocol/SConscript index 293346f13..cd9920d22 100644 --- a/src/mem/protocol/SConscript +++ b/src/mem/protocol/SConscript @@ -29,30 +29,51 @@ # Authors: Nathan Binkert import os -import re -import string import sys -from os.path import basename, dirname, exists, expanduser, isdir, isfile -from os.path import join as joinpath - -import SCons +from os.path import isdir, isfile, join as joinpath Import('*') if not env['RUBY']: Return() -slicc_dir = Dir('../slicc') protocol_dir = Dir('.') html_dir = Dir('html') +slicc_dir = Dir('../slicc') + +sys.path[1:1] = [ Dir('..').srcnode().abspath ] +from slicc.parser import SLICC + +slicc_depends = [] +for root,dirs,files in os.walk(slicc_dir.srcnode().abspath): + for f in files: + if f.endswith('.py'): + slicc_depends.append(File(joinpath(root, f))) # # Use SLICC # -def slicc_generator(target, source, env, for_signature): - slicc_bin = str(source[0]) - protocol = source[1].get_contents() + +def slicc_scanner(node, env, path): + contents = node.get_contents() + files = [ line.strip() for line in contents.splitlines() ] + return files + +env.Append(SCANNERS=Scanner(function=slicc_scanner,skeys=['.slicc'])) + +def slicc_emitter(target, source, env): + files = [s.srcnode().abspath for s in source[1:]] + slicc = SLICC(debug=True) + print "SLICC parsing..." + for name in slicc.load(files, verbose=True): + print " %s" % name + + target.extend(sorted(slicc.files())) + return target, source + +def slicc_action(target, source, env): + protocol = source[0].get_contents() pdir = str(protocol_dir) hdir = str(html_dir) @@ -61,32 +82,31 @@ def slicc_generator(target, source, env, for_signature): if not isdir(hdir): os.mkdir(hdir) - do_html = "html" - cmdline = [ slicc_bin, pdir, hdir, protocol, do_html ] - cmdline += [ str(s) for s in source[2:] ] - cmdline = ' '.join(cmdline) - return cmdline + slicc = SLICC(debug=True) + files = [str(s) for s in source[1:]] + slicc.load(files, verbose=False) -slicc_builder = Builder(generator=slicc_generator) + print "SLICC Generator pass 1..." + slicc.findMachines() -protocol = env['PROTOCOL'] -sources = [ protocol_dir.File("RubySlicc_interfaces.slicc"), - protocol_dir.File("%s.slicc" % protocol) ] + print "SLICC Generator pass 2..." + slicc.generate() -sys.path[0:0] = [env['ENV']['M5_PLY']] -execfile(slicc_dir.File('parser/parser.py').srcnode().abspath) + print "SLICC writing C++ files..." + slicc.writeCodeFiles(pdir) -sm_files = read_slicc([s.srcnode().abspath for s in sources]) -sm_files = [ protocol_dir.File(f) for f in sm_files ] + print "SLICC writing HTML files..." + slicc.writeHTMLFiles(hdir) -hh, cc = scan([s.srcnode().abspath for s in sm_files]) -hh = [ protocol_dir.File(f) for f in hh ] -cc = [ protocol_dir.File(f) for f in cc ] +slicc_builder = Builder(action=slicc_action, emitter=slicc_emitter) -slicc_bin = slicc_dir.File("slicc") +protocol = env['PROTOCOL'] +sources = [ protocol_dir.File("RubySlicc_interfaces.slicc"), + protocol_dir.File("%s.slicc" % protocol) ] env.Append(BUILDERS={'SLICC' : slicc_builder}) -env.SLICC(hh + cc, [ slicc_bin, Value(protocol) ] + sm_files) +nodes = env.SLICC([], [ Value(protocol) ] + sources) +env.Depends(nodes, slicc_depends) -for f in cc: +for f in sorted(s for s in nodes if str(s).endswith('.cc')): Source(f) diff --git a/src/mem/protocol/SConsopts b/src/mem/protocol/SConsopts index ded0814d2..10a303681 100644 --- a/src/mem/protocol/SConsopts +++ b/src/mem/protocol/SConsopts @@ -47,9 +47,10 @@ all_protocols = [ 'MOSI_SMP_bcast_m', 'MOSI_SMP_directory_1level', 'MSI_MOSI_CMP_directory', + 'MOESI_hammer', ] -opt = EnumVariable('PROTOCOL', 'Coherence Protocol for Ruby', 'MI_example', +opt = EnumVariable('PROTOCOL', 'Coherence Protocol for Ruby', 'MOESI_CMP_directory', all_protocols) sticky_vars.AddVariables(opt) |