diff options
author | Brad Beckmann <Brad.Beckmann@amd.com> | 2009-11-18 16:34:33 -0800 |
---|---|---|
committer | Brad Beckmann <Brad.Beckmann@amd.com> | 2009-11-18 16:34:33 -0800 |
commit | 5d8a669539a142ece820cf0c82722ea1c755d7cd (patch) | |
tree | b17fb03643674b83785859378296ed4690e3d715 /src/mem/protocol | |
parent | c6182199c54a874315a97338879d5bfaf3895a6f (diff) | |
download | gem5-5d8a669539a142ece820cf0c82722ea1c755d7cd.tar.xz |
Resurrection of the CMP token protocol to GEM5
Diffstat (limited to 'src/mem/protocol')
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-L1cache.sm | 353 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-L2cache.sm | 174 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-dir.sm | 866 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-dma.sm | 165 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-msg.sm | 54 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token.slicc | 1 | ||||
-rw-r--r-- | src/mem/protocol/RubySlicc_Util.sm | 1 |
7 files changed, 1335 insertions, 279 deletions
diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm index ab58c5c00..3fb4a8862 100644 --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -32,21 +32,32 @@ * */ -machine(L1Cache, "Token protocol") { +machine(L1Cache, "Token protocol") + : int l1_request_latency, + int l1_response_latency, + int l2_select_low_bit, + int l2_select_num_bits, + int N_tokens, + int retry_threshold, + int fixed_timeout_latency, + bool dynamic_timeout_enabled +{ // From this node's L1 cache TO the network - // a local L1 -> this L2 bank, currently ordered with directory forwarded requests - MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false"; + // a local L1 -> this L2 bank - MessageBuffer responseFromL1Cache, network="To", virtual_network="2", ordered="false"; - MessageBuffer persistentFromL1Cache, network="To", virtual_network="3", ordered="true"; + MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false"; + MessageBuffer persistentFromL1Cache, network="To", virtual_network="2", ordered="true"; + // a local L1 -> this L2 bank, currently ordered with directory forwarded requests + MessageBuffer requestFromL1Cache, network="To", virtual_network="4", ordered="false"; + // To this node's L1 cache FROM the network // a L2 bank -> this L1 - MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false"; + MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToL1Cache, network="From", virtual_network="2", ordered="true"; // a L2 bank -> this L1 - MessageBuffer responseToL1Cache, network="From", virtual_network="2", ordered="false"; - MessageBuffer persistentToL1Cache, network="From", virtual_network="3", ordered="true"; + MessageBuffer requestToL1Cache, network="From", virtual_network="4", ordered="false"; // STATES enumeration(State, desc="Cache states", default="L1Cache_State_I") { @@ -111,10 +122,6 @@ machine(L1Cache, "Token protocol") { // TYPES - int getRetryThreshold(); - int getFixedTimeoutLatency(); - bool getDynamicTimeoutEnabled(); - // CacheEntry structure(Entry, desc="...", interface="AbstractCacheEntry") { State CacheState, desc="cache state"; @@ -143,7 +150,7 @@ machine(L1Cache, "Token protocol") { external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); @@ -157,17 +164,28 @@ machine(L1Cache, "Token protocol") { bool isPresent(Address); } + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + bool okToIssueStarving(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } TBETable L1_TBEs, template_hack="<L1Cache_TBE>"; - CacheMemory L1IcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1I"', abstract_chip_ptr="true"; - CacheMemory L1DcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1D"', abstract_chip_ptr="true"; + CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])'; + CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])'; MessageBuffer mandatoryQueue, ordered="false", abstract_chip_ptr="true"; - Sequencer sequencer, abstract_chip_ptr="true", constructor_hack="i"; + Sequencer sequencer, factory='RubySystem::getSequencer(m_cfg["sequencer"])'; bool starving, default="false"; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; TimerTable useTimerTable; TimerTable reissueTimerTable; @@ -175,11 +193,11 @@ machine(L1Cache, "Token protocol") { int outstandingPersistentRequests, default="0"; int averageLatencyHysteresis, default="(8)"; // Constant that provides hysteresis for calculated the estimated average - int averageLatencyCounter, default="(500 << (*m_L1Cache_averageLatencyHysteresis_vec[i]))"; + int averageLatencyCounter, default="(500 << (*m_L1Cache_averageLatencyHysteresis_ptr))"; int averageLatencyEstimate() { DEBUG_EXPR( (averageLatencyCounter >> averageLatencyHysteresis) ); - profile_average_latency_estimate( (averageLatencyCounter >> averageLatencyHysteresis) ); + //profile_average_latency_estimate( (averageLatencyCounter >> averageLatencyHysteresis) ); return averageLatencyCounter >> averageLatencyHysteresis; } @@ -366,30 +384,33 @@ machine(L1Cache, "Token protocol") { } } - GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) { - if (machineIDToMachineType(sender) == MachineType:L1Cache) { - return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this - } else if (machineIDToMachineType(sender) == MachineType:L2Cache) { - if ( sender == (map_L1CacheMachId_to_L2Cache(addr,machineID))) { - return GenericMachineType:L2Cache; - } else { - return GenericMachineType:L2Cache_wCC; - } - } else { - return ConvertMachToGenericMach(machineIDToMachineType(sender)); - } - } - - bool okToIssueStarving(Address addr) { - return persistentTable.okToIssueStarving(addr); +// GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) { +// if (machineIDToMachineType(sender) == MachineType:L1Cache) { +// return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this +// } else if (machineIDToMachineType(sender) == MachineType:L2Cache) { +// +// if (sender == (mapAddressToRange(addr, +// MachineType:L2Cache, +// l2_select_low_bit, +// l2_select_num_bits))) { +// +// return GenericMachineType:L2Cache; +// } else { +// return GenericMachineType:L2Cache_wCC; +// } +// } else { +// return ConvertMachToGenericMach(machineIDToMachineType(sender)); +// } +// } + + bool okToIssueStarving(Address addr, MachineID machinID) { + return persistentTable.okToIssueStarving(addr, machineID); } void markPersistentEntries(Address addr) { persistentTable.markEntries(addr); } - MessageBuffer triggerQueue, ordered="false", random="false"; - // ** OUT_PORTS ** out_port(persistentNetwork_out, PersistentMsg, persistentFromL1Cache); out_port(requestNetwork_out, RequestMsg, requestFromL1Cache); @@ -507,7 +528,11 @@ machine(L1Cache, "Token protocol") { // Mark TBE flag if response received off-chip. Use this to update average latency estimate if ( in_msg.SenderMachine == MachineType:L2Cache ) { - if (in_msg.Sender == map_L1CacheMachId_to_L2Cache(in_msg.Address, machineID)) { + if (in_msg.Sender == mapAddressToRange(in_msg.Address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)) { + // came from an off-chip L2 cache if (L1_TBEs.isPresent(in_msg.Address)) { // L1_TBEs[in_msg.Address].ExternalResponse := true; @@ -523,15 +548,15 @@ machine(L1Cache, "Token protocol") { // profile_memory_response( in_msg.Address); } } else if ( in_msg.SenderMachine == MachineType:L1Cache) { - if (isLocalProcessor(machineID, in_msg.Sender) == false) { - if (L1_TBEs.isPresent(in_msg.Address)) { + //if (isLocalProcessor(machineID, in_msg.Sender) == false) { + //if (L1_TBEs.isPresent(in_msg.Address)) { // L1_TBEs[in_msg.Address].ExternalResponse := true; // profile_offchipL1_response(in_msg.Address ); - } - } - else { + //} + //} + //else { // profile_onchipL1_response(in_msg.Address ); - } + //} } else { error("unexpected SenderMachine"); } @@ -570,42 +595,42 @@ machine(L1Cache, "Token protocol") { // ** INSTRUCTION ACCESS *** // Check to see if it is in the OTHER L1 - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.Address); + trigger(Event:L1_Replacement, in_msg.LineAddress); } - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1IcacheMemory.cacheAvail(in_msg.Address)) { + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { // No room in the L1, so we need to make room - trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.LineAddress)); } } } else { // *** DATA ACCESS *** // Check to see if it is in the OTHER L1 - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.Address); + trigger(Event:L1_Replacement, in_msg.LineAddress); } - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1DcacheMemory.cacheAvail(in_msg.Address)) { + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { // No room in the L1, so we need to make room - trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.LineAddress)); } } } @@ -618,19 +643,31 @@ machine(L1Cache, "Token protocol") { action(a_issueReadRequest, "a", desc="Issue GETS") { if (L1_TBEs[address].IssueCount == 0) { // Update outstanding requests - profile_outstanding_request(outstandingRequests); + //profile_outstanding_request(outstandingRequests); outstandingRequests := outstandingRequests + 1; } - if (L1_TBEs[address].IssueCount >= getRetryThreshold() ) { + if (L1_TBEs[address].IssueCount >= retry_threshold) { // Issue a persistent request if possible - if (okToIssueStarving(address) && (starving == false)) { - enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := PersistentRequestType:GETS_PERSISTENT; out_msg.Requestor := machineID; out_msg.Destination.broadcast(MachineType:L1Cache); - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := L1_TBEs[address].Prefetch; @@ -640,11 +677,11 @@ machine(L1Cache, "Token protocol") { starving := true; if (L1_TBEs[address].IssueCount == 0) { - profile_persistent_prediction(address, L1_TBEs[address].AccessType); + //profile_persistent_prediction(address, L1_TBEs[address].AccessType); } // Update outstanding requests - profile_outstanding_persistent_request(outstandingPersistentRequests); + //profile_outstanding_persistent_request(outstandingPersistentRequests); outstandingPersistentRequests := outstandingPersistentRequests + 1; // Increment IssueCount @@ -666,11 +703,16 @@ machine(L1Cache, "Token protocol") { } } else { // Make a normal request - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.RetryNum := L1_TBEs[address].IssueCount; if (L1_TBEs[address].IssueCount == 0) { out_msg.MessageSize := MessageSizeType:Request_Control; @@ -682,11 +724,18 @@ machine(L1Cache, "Token protocol") { } // send to other local L1s, with local bit set - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; - out_msg.Destination := getOtherLocalL1IDs(machineID); + + // + // Since only one chip, assuming all L1 caches are local + // + //out_msg.Destination := getOtherLocalL1IDs(machineID); + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.remove(machineID); + out_msg.RetryNum := L1_TBEs[address].IssueCount; out_msg.isLocal := true; if (L1_TBEs[address].IssueCount == 0) { @@ -703,10 +752,10 @@ machine(L1Cache, "Token protocol") { // Set a wakeup timer - if (getDynamicTimeoutEnabled()) { + if (dynamic_timeout_enabled) { reissueTimerTable.set(address, 1.25 * averageLatencyEstimate()); } else { - reissueTimerTable.set(address, getFixedTimeoutLatency()); + reissueTimerTable.set(address, fixed_timeout_latency); } } @@ -716,20 +765,32 @@ machine(L1Cache, "Token protocol") { if (L1_TBEs[address].IssueCount == 0) { // Update outstanding requests - profile_outstanding_request(outstandingRequests); + //profile_outstanding_request(outstandingRequests); outstandingRequests := outstandingRequests + 1; } - if (L1_TBEs[address].IssueCount >= getRetryThreshold() ) { + if (L1_TBEs[address].IssueCount >= retry_threshold) { // Issue a persistent request if possible - if ( okToIssueStarving(address) && (starving == false)) { - enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") { + if ( okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := PersistentRequestType:GETX_PERSISTENT; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L1Cache; out_msg.Destination.broadcast(MachineType:L1Cache); - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := L1_TBEs[address].Prefetch; @@ -739,11 +800,11 @@ machine(L1Cache, "Token protocol") { starving := true; // Update outstanding requests - profile_outstanding_persistent_request(outstandingPersistentRequests); + //profile_outstanding_persistent_request(outstandingPersistentRequests); outstandingPersistentRequests := outstandingPersistentRequests + 1; if (L1_TBEs[address].IssueCount == 0) { - profile_persistent_prediction(address, L1_TBEs[address].AccessType); + //profile_persistent_prediction(address, L1_TBEs[address].AccessType); } // Increment IssueCount @@ -766,12 +827,17 @@ machine(L1Cache, "Token protocol") { } else { // Make a normal request - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L1Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.RetryNum := L1_TBEs[address].IssueCount; if (L1_TBEs[address].IssueCount == 0) { @@ -784,12 +850,19 @@ machine(L1Cache, "Token protocol") { } // send to other local L1s too - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; out_msg.isLocal := true; - out_msg.Destination := getOtherLocalL1IDs(machineID); + + // + // Since only one chip, assuming all L1 caches are local + // + //out_msg.Destination := getOtherLocalL1IDs(machineID); + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.remove(machineID); + out_msg.RetryNum := L1_TBEs[address].IssueCount; if (L1_TBEs[address].IssueCount == 0) { out_msg.MessageSize := MessageSizeType:Request_Control; @@ -807,10 +880,10 @@ machine(L1Cache, "Token protocol") { DEBUG_EXPR(L1_TBEs[address].IssueCount); // Set a wakeup timer - if (getDynamicTimeoutEnabled()) { + if (dynamic_timeout_enabled) { reissueTimerTable.set(address, 1.25 * averageLatencyEstimate()); } else { - reissueTimerTable.set(address, getFixedTimeoutLatency()); + reissueTimerTable.set(address, fixed_timeout_latency); } } } @@ -818,7 +891,7 @@ machine(L1Cache, "Token protocol") { action(bb_bounceResponse, "\b", desc="Bounce tokens and data to memory") { peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -833,11 +906,16 @@ machine(L1Cache, "Token protocol") { } action(c_ownedReplacement, "c", desc="Issue writeback") { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Tokens := getCacheEntry(address).Tokens; out_msg.DataBlk := getCacheEntry(address).DataBlk; out_msg.Dirty := getCacheEntry(address).Dirty; @@ -853,11 +931,16 @@ machine(L1Cache, "Token protocol") { // don't send writeback if replacing block with no tokens if (getCacheEntry(address).Tokens != 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Tokens := getCacheEntry(address).Tokens; out_msg.DataBlk := getCacheEntry(address).DataBlk; // assert(getCacheEntry(address).Dirty == false); @@ -879,7 +962,7 @@ machine(L1Cache, "Token protocol") { action(d_sendDataWithToken, "d", desc="Send data and a token from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -902,14 +985,14 @@ machine(L1Cache, "Token protocol") { action(d_sendDataWithNTokenIfAvail, "\dd", desc="Send data and a token from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - if (getCacheEntry(address).Tokens > N_tokens()) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + if (getCacheEntry(address).Tokens > N_tokens) { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(in_msg.Requestor); - out_msg.Tokens := N_tokens(); + out_msg.Tokens := N_tokens; out_msg.DataBlk := getCacheEntry(address).DataBlk; // out_msg.Dirty := getCacheEntry(address).Dirty; out_msg.Dirty := false; @@ -919,10 +1002,10 @@ machine(L1Cache, "Token protocol") { out_msg.MessageSize := MessageSizeType:Response_Data; } } - getCacheEntry(address).Tokens := getCacheEntry(address).Tokens - N_tokens(); + getCacheEntry(address).Tokens := getCacheEntry(address).Tokens - N_tokens; } else if (getCacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -946,7 +1029,7 @@ machine(L1Cache, "Token protocol") { action(dd_sendDataWithAllTokens, "\d", desc="Send data and all tokens from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -969,7 +1052,7 @@ machine(L1Cache, "Token protocol") { action(e_sendAckWithCollectedTokens, "e", desc="Send ack with the tokens we've collected thus far.") { // assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself if (getCacheEntry(address).Tokens > 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -986,7 +1069,7 @@ machine(L1Cache, "Token protocol") { action(ee_sendDataWithAllTokens, "\e", desc="Send data and all tokens from cache to starver") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getCacheEntry(address).Tokens > 0); - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -1005,23 +1088,23 @@ machine(L1Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getCacheEntry(address).Tokens > 0); if (getCacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(persistentTable.findSmallest(address)); assert(getCacheEntry(address).Tokens >= 1); - if (getCacheEntry(address).Tokens > N_tokens()) { - out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens; } else { out_msg.Tokens := getCacheEntry(address).Tokens - 1; } out_msg.MessageSize := MessageSizeType:Response_Control; } } - if (getCacheEntry(address).Tokens > N_tokens()) { - getCacheEntry(address).Tokens := N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + getCacheEntry(address).Tokens := N_tokens; } else { getCacheEntry(address).Tokens := 1; } @@ -1031,15 +1114,15 @@ machine(L1Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getCacheEntry(address).Tokens > 0); if (getCacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(persistentTable.findSmallest(address)); assert(getCacheEntry(address).Tokens >= 1); - if (getCacheEntry(address).Tokens > N_tokens()) { - out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens; } else { out_msg.Tokens := getCacheEntry(address).Tokens - 1; } @@ -1047,8 +1130,8 @@ machine(L1Cache, "Token protocol") { out_msg.Dirty := getCacheEntry(address).Dirty; out_msg.MessageSize := MessageSizeType:Response_Data; } - if (getCacheEntry(address).Tokens > N_tokens()) { - getCacheEntry(address).Tokens := N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + getCacheEntry(address).Tokens := N_tokens; } else { getCacheEntry(address).Tokens := 1; } @@ -1061,7 +1144,7 @@ machine(L1Cache, "Token protocol") { peek(responseNetwork_in, ResponseMsg) { // assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -1079,7 +1162,8 @@ machine(L1Cache, "Token protocol") { action(h_load_hit, "h", desc="Notify sequencer the load completed.") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + //sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + sequencer.readCallback(address, getCacheEntry(address).DataBlk); } action(x_external_load_hit, "x", desc="Notify sequencer the load completed.") { @@ -1087,14 +1171,16 @@ machine(L1Cache, "Token protocol") { DEBUG_EXPR(getCacheEntry(address).DataBlk); peek(responseNetwork_in, ResponseMsg) { - sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + //sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + sequencer.readCallback(address, getCacheEntry(address).DataBlk); } } action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + sequencer.writeCallback(address, getCacheEntry(address).DataBlk); getCacheEntry(address).Dirty := true; DEBUG_EXPR(getCacheEntry(address).DataBlk); } @@ -1103,7 +1189,8 @@ machine(L1Cache, "Token protocol") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); peek(responseNetwork_in, ResponseMsg) { - sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + sequencer.writeCallback(address, getCacheEntry(address).DataBlk); } getCacheEntry(address).Dirty := true; DEBUG_EXPR(getCacheEntry(address).DataBlk); @@ -1133,8 +1220,6 @@ machine(L1Cache, "Token protocol") { useTimerTable.unset(address); } - - action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") { mandatoryQueue_in.dequeue(); } @@ -1156,14 +1241,19 @@ machine(L1Cache, "Token protocol") { } action(p_informL2AboutTokenLoss, "p", desc="Inform L2 about loss of all tokens") { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:INV; out_msg.Tokens := 0; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.DestMachine := MachineType:L2Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Response_Control; } } @@ -1189,13 +1279,25 @@ machine(L1Cache, "Token protocol") { if (L1_TBEs[address].WentPersistent) { // assert(starving == true); outstandingRequests := outstandingRequests - 1; - enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") { + enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L1Cache; out_msg.Destination.broadcast(MachineType:L1Cache); - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Persistent_Control; } @@ -1217,14 +1319,14 @@ machine(L1Cache, "Token protocol") { // profile_token_retry(address, L1_TBEs[address].AccessType, 1); //} - profile_token_retry(address, L1_TBEs[address].AccessType, L1_TBEs[address].IssueCount); + //profile_token_retry(address, L1_TBEs[address].AccessType, L1_TBEs[address].IssueCount); L1_TBEs.deallocate(address); } action(t_sendAckWithCollectedTokens, "t", desc="Send ack with the tokens we've collected thus far.") { if (getCacheEntry(address).Tokens > 0) { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -1259,13 +1361,13 @@ machine(L1Cache, "Token protocol") { action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") { if (L1DcacheMemory.isTagPresent(address) == false) { - L1DcacheMemory.allocate(address); + L1DcacheMemory.allocate(address, new Entry); } } action(pp_allocateL1ICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") { if (L1IcacheMemory.isTagPresent(address) == false) { - L1IcacheMemory.allocate(address); + L1IcacheMemory.allocate(address, new Entry); } } @@ -1281,11 +1383,6 @@ machine(L1Cache, "Token protocol") { } } - - action(z_stall, "z", desc="Stall") { - - } - action(zz_recycleMandatoryQueue, "\z", desc="Send the head of the mandatory queue to the back of the queue.") { mandatoryQueue_in.recycle(); } diff --git a/src/mem/protocol/MOESI_CMP_token-L2cache.sm b/src/mem/protocol/MOESI_CMP_token-L2cache.sm index 0a58ed5cf..9a5c400f2 100644 --- a/src/mem/protocol/MOESI_CMP_token-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L2cache.sm @@ -32,20 +32,33 @@ * */ -machine(L2Cache, "Token protocol") { +machine(L2Cache, "Token protocol") + : int l2_request_latency, + int l2_response_latency, + int N_tokens, + bool filtering_enabled +{ // L2 BANK QUEUES // From local bank of L2 cache TO the network - MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="0", ordered="false"; // this L2 bank -> a local L1 - MessageBuffer GlobalRequestFromL2Cache, network="To", virtual_network="1", ordered="false"; // this L2 bank -> mod-directory - MessageBuffer responseFromL2Cache, network="To", virtual_network="2", ordered="false"; // this L2 bank -> a local L1 || mod-directory + + // this L2 bank -> a local L1 || mod-directory + MessageBuffer responseFromL2Cache, network="To", virtual_network="1", ordered="false"; + // this L2 bank -> mod-directory + MessageBuffer GlobalRequestFromL2Cache, network="To", virtual_network="3", ordered="false"; + // this L2 bank -> a local L1 + MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="4", ordered="false"; // FROM the network to this local bank of L2 cache - MessageBuffer L1RequestToL2Cache, network="From", virtual_network="0", ordered="false"; // a local L1 -> this L2 bank - MessageBuffer GlobalRequestToL2Cache, network="From", virtual_network="1", ordered="false"; // mod-directory -> this L2 bank - MessageBuffer responseToL2Cache, network="From", virtual_network="2", ordered="false"; // a local L1 || mod-directory -> this L2 bank - MessageBuffer persistentToL2Cache, network="From", virtual_network="3", ordered="true"; + + // a local L1 || mod-directory -> this L2 bank + MessageBuffer responseToL2Cache, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToL2Cache, network="From", virtual_network="2", ordered="true"; + // mod-directory -> this L2 bank + MessageBuffer GlobalRequestToL2Cache, network="From", virtual_network="3", ordered="false"; + // a local L1 -> this L2 bank + MessageBuffer L1RequestToL2Cache, network="From", virtual_network="4", ordered="false"; // STATES enumeration(State, desc="L2 Cache states", default="L2Cache_State_I") { @@ -107,8 +120,6 @@ machine(L2Cache, "Token protocol") { DataBlock DataBlk, desc="data for the block"; } - - structure(DirEntry, desc="...") { Set Sharers, desc="Set of the internal processors that want the block in shared state"; bool exclusive, default="false", desc="if local exclusive is likely"; @@ -117,7 +128,7 @@ machine(L2Cache, "Token protocol") { external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); @@ -132,19 +143,28 @@ machine(L2Cache, "Token protocol") { bool isTagPresent(Address); } + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } - CacheMemory L2cacheMemory, template_hack="<L2Cache_Entry>", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,MachineType_L2Cache,int_to_string(i)+"_L2"'; + CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])'; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; PerfectCacheMemory localDirectory, template_hack="<L2Cache_DirEntry>"; - - bool getFilteringEnabled(); - Entry getL2CacheEntry(Address addr), return_by_ref="yes" { if (L2cacheMemory.isTagPresent(addr)) { return L2cacheMemory[addr]; } + assert(false); + return L2cacheMemory[addr]; } int getTokens(Address addr) { @@ -465,15 +485,21 @@ machine(L2Cache, "Token protocol") { // if this is a retry or no local sharers, broadcast normally // if (in_msg.RetryNum > 0 || (in_msg.Type == CoherenceRequestType:GETX && exclusiveExists(in_msg.Address) == false) || (in_msg.Type == CoherenceRequestType:GETS && sharersExist(in_msg.Address) == false)) { - enqueue(globalRequestNetwork_out, RequestMsg, latency="L2_REQUEST_LATENCY") { + enqueue(globalRequestNetwork_out, RequestMsg, latency=l2_request_latency) { out_msg.Address := in_msg.Address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; out_msg.RequestorMachine := in_msg.RequestorMachine; - //out_msg.Destination.broadcast(MachineType:L2Cache); out_msg.RetryNum := in_msg.RetryNum; - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); - out_msg.Destination.remove(map_L1CacheMachId_to_L2Cache(address, in_msg.Requestor)); + + // + // If a statically shared L2 cache, then no other L2 caches can + // store the block + // + //out_msg.Destination.broadcast(MachineType:L2Cache); + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + //out_msg.Destination.remove(map_L1CacheMachId_to_L2Cache(address, in_msg.Requestor)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.AccessMode := in_msg.AccessMode; @@ -489,7 +515,7 @@ machine(L2Cache, "Token protocol") { action(bb_bounceResponse, "\b", desc="Bounce tokens and data to memory") { peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -505,7 +531,7 @@ machine(L2Cache, "Token protocol") { action(c_cleanReplacement, "c", desc="Issue clean writeback") { if (getL2CacheEntry(address).Tokens > 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -519,7 +545,7 @@ machine(L2Cache, "Token protocol") { } action(cc_dirtyReplacement, "\c", desc="Issue dirty writeback") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; @@ -541,22 +567,22 @@ machine(L2Cache, "Token protocol") { action(d_sendDataWithTokens, "d", desc="Send data and a token from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - if (getL2CacheEntry(address).Tokens > N_tokens()) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + if (getL2CacheEntry(address).Tokens > N_tokens) { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; out_msg.Destination.add(in_msg.Requestor); - out_msg.Tokens := N_tokens(); + out_msg.Tokens := N_tokens; out_msg.DataBlk := getL2CacheEntry(address).DataBlk; out_msg.Dirty := false; out_msg.MessageSize := MessageSizeType:Response_Data; } - getL2CacheEntry(address).Tokens := getL2CacheEntry(address).Tokens - N_tokens(); + getL2CacheEntry(address).Tokens := getL2CacheEntry(address).Tokens - N_tokens; } else { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -574,7 +600,7 @@ machine(L2Cache, "Token protocol") { action(dd_sendDataWithAllTokens, "\d", desc="Send data and all tokens from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -592,7 +618,7 @@ machine(L2Cache, "Token protocol") { action(e_sendAckWithCollectedTokens, "e", desc="Send ack with the tokens we've collected thus far.") { if (getL2CacheEntry(address).Tokens > 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -607,7 +633,7 @@ machine(L2Cache, "Token protocol") { } action(ee_sendDataWithAllTokens, "\e", desc="Send data and all tokens from cache to starver") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -626,7 +652,7 @@ machine(L2Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getL2CacheEntry(address).Tokens > 0); if (getL2CacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -644,7 +670,7 @@ machine(L2Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getL2CacheEntry(address).Tokens > 0); if (getL2CacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -666,7 +692,7 @@ machine(L2Cache, "Token protocol") { // assert(persistentTable.isLocked(address)); peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -684,7 +710,7 @@ machine(L2Cache, "Token protocol") { //assert(persistentTable.isLocked(address)); peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; if (in_msg.Type == CoherenceResponseType:WB_SHARED_DATA) { out_msg.Type := CoherenceResponseType:DATA_SHARED; @@ -706,7 +732,7 @@ machine(L2Cache, "Token protocol") { // assert(persistentTable.isLocked(address)); peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -729,24 +755,31 @@ machine(L2Cache, "Token protocol") { action(j_forwardTransientRequestToLocalSharers, "j", desc="Forward external transient request to local sharers") { peek(requestNetwork_in, RequestMsg) { - if (getFilteringEnabled() == true && in_msg.RetryNum == 0 && sharersExist(in_msg.Address) == false) { - profile_filter_action(1); + if (filtering_enabled == true && in_msg.RetryNum == 0 && sharersExist(in_msg.Address) == false) { + //profile_filter_action(1); DEBUG_EXPR("filtered message"); DEBUG_EXPR(in_msg.RetryNum); } else { - enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) { + enqueue(localRequestNetwork_out, RequestMsg, latency=l2_response_latency ) { out_msg.Address := in_msg.Address; out_msg.Requestor := in_msg.Requestor; out_msg.RequestorMachine := in_msg.RequestorMachine; - out_msg.Destination := getLocalL1IDs(machineID); + + // + // Currently assuming only one chip so all L1s are local + // + //out_msg.Destination := getLocalL1IDs(machineID); + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.remove(in_msg.Requestor); + out_msg.Type := in_msg.Type; out_msg.isLocal := false; out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.AccessMode := in_msg.AccessMode; out_msg.Prefetch := in_msg.Prefetch; } - profile_filter_action(0); + //profile_filter_action(0); } } } @@ -756,7 +789,7 @@ machine(L2Cache, "Token protocol") { peek(L1requestNetwork_in, RequestMsg) { assert(getL2CacheEntry(address).Tokens > 0); //enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_to_L1_RESPONSE_LATENCY") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -774,7 +807,7 @@ machine(L2Cache, "Token protocol") { action(k_dataOwnerFromL2CacheToL1Requestor, "\k", desc="Send data and a token from cache to L1 requestor") { peek(L1requestNetwork_in, RequestMsg) { assert(getL2CacheEntry(address).Tokens > 0); - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -793,7 +826,7 @@ machine(L2Cache, "Token protocol") { peek(L1requestNetwork_in, RequestMsg) { // assert(getL2CacheEntry(address).Tokens == max_tokens()); //enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_to_L1_RESPONSE_LATENCY") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -840,12 +873,13 @@ machine(L2Cache, "Token protocol") { } action(r_markNewSharer, "r", desc="Mark the new local sharer from local request message") { - peek(L1requestNetwork_in, RequestMsg) { - if (in_msg.Type == CoherenceRequestType:GETX) { - setNewWriter(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); - } else if (in_msg.Type == CoherenceRequestType:GETS) { - addNewSharer(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); + if (machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache) { + if (in_msg.Type == CoherenceRequestType:GETX) { + setNewWriter(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); + } else if (in_msg.Type == CoherenceRequestType:GETS) { + addNewSharer(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); + } } } } @@ -854,16 +888,19 @@ machine(L2Cache, "Token protocol") { clearExclusiveBitIfExists(address); } - action( r_setMRU, "\rr", desc="manually set the MRU bit for cache line" ) { - if(isCacheTagPresent(address)) { - L2cacheMemory.setMRU(address); + action(r_setMRU, "\rr", desc="manually set the MRU bit for cache line" ) { + peek(L1requestNetwork_in, RequestMsg) { + if ((machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache) && + (isCacheTagPresent(address))) { + L2cacheMemory.setMRU(address); + } } } action(t_sendAckWithCollectedTokens, "t", desc="Send ack with the tokens we've collected thus far.") { if (getL2CacheEntry(address).Tokens > 0) { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -881,7 +918,7 @@ machine(L2Cache, "Token protocol") { action(tt_sendLocalAckWithCollectedTokens, "tt", desc="Send ack with the tokens we've collected thus far.") { if (getL2CacheEntry(address).Tokens > 0) { peek(L1requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -906,19 +943,19 @@ machine(L2Cache, "Token protocol") { } action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") { - L2cacheMemory.allocate(address); + L2cacheMemory.allocate(address, new Entry); } action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { L2cacheMemory.deallocate(address); } - action(uu_profileMiss, "\u", desc="Profile the demand miss") { - peek(L1requestNetwork_in, RequestMsg) { + //action(uu_profileMiss, "\u", desc="Profile the demand miss") { + // peek(L1requestNetwork_in, RequestMsg) { // AccessModeType not implemented //profile_L2Cache_miss(convertToGenericType(in_msg.Type), in_msg.AccessMode, MessageSizeTypeToInt(in_msg.MessageSize), in_msg.Prefetch, machineIDToNodeID(in_msg.Requestor)); - } - } + // } + //} action(w_assertIncomingDataAndCacheDataMatch, "w", desc="Assert that the incoming data and the data in the cache match") { @@ -927,11 +964,6 @@ machine(L2Cache, "Token protocol") { } } - action(z_stall, "z", desc="Stall") { - } - - - //***************************************************** // TRANSITIONS @@ -961,7 +993,7 @@ machine(L2Cache, "Token protocol") { transition(NP, {L1_GETS, L1_GETX}) { a_broadcastLocalRequest; r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1012,7 +1044,7 @@ machine(L2Cache, "Token protocol") { a_broadcastLocalRequest; tt_sendLocalAckWithCollectedTokens; // send any tokens we have collected r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1020,7 +1052,7 @@ machine(L2Cache, "Token protocol") { a_broadcastLocalRequest; tt_sendLocalAckWithCollectedTokens; // send any tokens we have collected r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1181,7 +1213,7 @@ machine(L2Cache, "Token protocol") { tt_sendLocalAckWithCollectedTokens; r_markNewSharer; r_setMRU; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1294,7 +1326,7 @@ machine(L2Cache, "Token protocol") { k_dataAndAllTokensFromL2CacheToL1Requestor; r_markNewSharer; r_setMRU; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1382,7 +1414,7 @@ machine(L2Cache, "Token protocol") { transition(I_L, {L1_GETX, L1_GETS}) { a_broadcastLocalRequest; r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1391,7 +1423,7 @@ machine(L2Cache, "Token protocol") { tt_sendLocalAckWithCollectedTokens; r_markNewSharer; r_setMRU; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } diff --git a/src/mem/protocol/MOESI_CMP_token-dir.sm b/src/mem/protocol/MOESI_CMP_token-dir.sm index 1592fd123..7925a8fe0 100644 --- a/src/mem/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/protocol/MOESI_CMP_token-dir.sm @@ -32,14 +32,23 @@ */ -machine(Directory, "Token protocol") { - - MessageBuffer requestFromDir, network="To", virtual_network="1", ordered="false"; - MessageBuffer responseFromDir, network="To", virtual_network="2", ordered="false"; - - MessageBuffer persistentToDir, network="From", virtual_network="3", ordered="true"; - MessageBuffer requestToDir, network="From", virtual_network="1", ordered="false"; - MessageBuffer responseToDir, network="From", virtual_network="2", ordered="false"; +machine(Directory, "Token protocol") + : int directory_latency, + int l2_select_low_bit, + int l2_select_num_bits, + bool distributed_persistent, + int fixed_timeout_latency +{ + + MessageBuffer dmaResponseFromDir, network="To", virtual_network="0", ordered="true"; + MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; + MessageBuffer persistentFromDir, network="To", virtual_network="2", ordered="true"; + MessageBuffer requestFromDir, network="To", virtual_network="4", ordered="false"; + + MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToDir, network="From", virtual_network="2", ordered="true"; + MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; + MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; // STATES enumeration(State, desc="Directory states", default="Directory_State_O") { @@ -47,6 +56,24 @@ machine(Directory, "Token protocol") { O, desc="Owner"; NO, desc="Not Owner"; L, desc="Locked"; + + // Memory wait states - can block all messages including persistent requests + O_W, desc="transitioning to Owner, waiting for memory write"; + L_W, desc="transitioning to Locked, waiting for memory read"; + DR_L_W, desc="transitioning to Locked underneath a DMA read, waiting for memory data"; + NO_W, desc="transitioning to Not Owner, waiting for memory read"; + O_DW_W, desc="transitioning to Owner, waiting for memory before DMA ack"; + O_DR_W, desc="transitioning to Owner, waiting for memory before DMA data"; + + // DMA request transient states - must respond to persistent requests + O_DW, desc="issued GETX for DMA write, waiting for all tokens"; + NO_DW, desc="issued GETX for DMA write, waiting for all tokens"; + NO_DR, desc="issued GETS for DMA read, waiting for data"; + + // DMA request in progress - competing with a CPU persistent request + DW_L, desc="issued GETX for DMA write, CPU persistent request must complete first"; + DR_L, desc="issued GETS for DMA read, CPU persistent request must complete first"; + } // Events @@ -55,9 +82,23 @@ machine(Directory, "Token protocol") { GETS, desc="A GETS arrives"; Lockdown, desc="A lockdown request arrives"; Unlockdown, desc="An un-lockdown request arrives"; + Own_Lock_or_Unlock, desc="own lock or unlock"; Data_Owner, desc="Data arrive"; + Data_All_Tokens, desc="Data and all tokens"; Ack_Owner, desc="Owner token arrived without data because it was clean"; + Ack_Owner_All_Tokens, desc="All tokens including owner arrived without data because it was clean"; Tokens, desc="Tokens arrive"; + Ack_All_Tokens, desc="All_Tokens arrive"; + Request_Timeout, desc="A DMA request has timed out"; + + // Memory Controller + Memory_Data, desc="Fetched data from memory arrives"; + Memory_Ack, desc="Writeback Ack from memory arrives"; + + // DMA requests + DMA_READ, desc="A DMA Read memory request"; + DMA_WRITE, desc="A DMA Write memory request"; + DMA_WRITE_All_Tokens, desc="A DMA Write memory request, directory has all tokens"; } // TYPES @@ -73,7 +114,7 @@ machine(Directory, "Token protocol") { // is 'soft state' that does not need to be correct (as long as // you're eventually willing to resort to broadcast.) - Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time."; + Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time."; Set Sharers, desc="Probable sharers of the line. More accurately, the set of processors who need to see a GetX"; } @@ -82,23 +123,70 @@ machine(Directory, "Token protocol") { bool isPresent(Address); } + external_type(MemoryControl, inport="yes", outport="yes") { + + } + + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + bool okToIssueStarving(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } + + // TBE entries for DMA requests + structure(TBE, desc="TBE entries for outstanding DMA requests") { + Address PhysicalAddress, desc="physical address"; + State TBEState, desc="Transient State"; + DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory"; + DataBlock DataBlk, desc="The current view of system memory"; + int Len, desc="..."; + MachineID DmaRequestor, desc="DMA requestor"; + bool WentPersistent, desc="Did the DMA request require a persistent request"; + } + + external_type(TBETable) { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } // ** OBJECTS ** - DirectoryMemory directory, constructor_hack="i"; + DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; + + MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; + TimerTable reissueTimerTable; + + TBETable TBEs, template_hack="<Directory_TBE>"; + + bool starving, default="false"; State getState(Address addr) { - return directory[addr].DirectoryState; + if (TBEs.isPresent(addr)) { + return TBEs[addr].TBEState; + } else { + return directory[addr].DirectoryState; + } } void setState(Address addr, State state) { + if (TBEs.isPresent(addr)) { + TBEs[addr].TBEState := state; + } directory[addr].DirectoryState := state; if (state == State:L) { assert(directory[addr].Tokens == 0); - } + } // We have one or zero owners assert((directory[addr].Owner.count() == 0) || (directory[addr].Owner.count() == 1)); @@ -112,19 +200,90 @@ machine(Directory, "Token protocol") { // assert(directory[addr].Tokens >= (max_tokens() / 2)); // Only mostly true; this might not always hold } } + + bool okToIssueStarving(Address addr, MachineID machinID) { + return persistentTable.okToIssueStarving(addr, machineID); + } + + void markPersistentEntries(Address addr) { + persistentTable.markEntries(addr); + } // ** OUT_PORTS ** out_port(responseNetwork_out, ResponseMsg, responseFromDir); + out_port(persistentNetwork_out, PersistentMsg, persistentFromDir); out_port(requestNetwork_out, RequestMsg, requestFromDir); + out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir); + + // + // Memory buffer for memory controller to DIMM communication + // + out_port(memQueue_out, MemoryMsg, memBuffer); // ** IN_PORTS ** + + // off-chip memory request/response is done + in_port(memQueue_in, MemoryMsg, memBuffer) { + if (memQueue_in.isReady()) { + peek(memQueue_in, MemoryMsg) { + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:Memory_Data, in_msg.Address); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:Memory_Ack, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + + // Reissue Timer + in_port(reissueTimerTable_in, Address, reissueTimerTable) { + if (reissueTimerTable_in.isReady()) { + trigger(Event:Request_Timeout, reissueTimerTable.readyAddress()); + } + } + + in_port(responseNetwork_in, ResponseMsg, responseToDir) { + if (responseNetwork_in.isReady()) { + peek(responseNetwork_in, ResponseMsg) { + assert(in_msg.Destination.isElement(machineID)); + if (directory[in_msg.Address].Tokens + in_msg.Tokens == max_tokens()) { + if ((in_msg.Type == CoherenceResponseType:DATA_OWNER) || + (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { + trigger(Event:Data_All_Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { + trigger(Event:Ack_Owner_All_Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack_All_Tokens, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } else { + if (in_msg.Type == CoherenceResponseType:DATA_OWNER) { + trigger(Event:Data_Owner, in_msg.Address); + } else if ((in_msg.Type == CoherenceResponseType:ACK) || + (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { + trigger(Event:Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { + trigger(Event:Ack_Owner, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + } in_port(persistentNetwork_in, PersistentMsg, persistentToDir) { if (persistentNetwork_in.isReady()) { peek(persistentNetwork_in, PersistentMsg) { assert(in_msg.Destination.isElement(machineID)); - if (distributedPersistentEnabled()) { + if (distributed_persistent) { // Apply the lockdown or unlockdown message to the table if (in_msg.Type == PersistentRequestType:GETX_PERSISTENT) { persistentTable.persistentRequestLock(in_msg.Address, in_msg.Requestor, AccessType:Write); @@ -173,19 +332,18 @@ machine(Directory, "Token protocol") { } } - in_port(responseNetwork_in, ResponseMsg, responseToDir) { - if (responseNetwork_in.isReady()) { - peek(responseNetwork_in, ResponseMsg) { - assert(in_msg.Destination.isElement(machineID)); - if (in_msg.Type == CoherenceResponseType:DATA_OWNER) { - trigger(Event:Data_Owner, in_msg.Address); - } else if ((in_msg.Type == CoherenceResponseType:ACK) || - (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { - trigger(Event:Tokens, in_msg.Address); - } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { - trigger(Event:Ack_Owner, in_msg.Address); + in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, DMARequestMsg) { + if (in_msg.Type == DMARequestType:READ) { + trigger(Event:DMA_READ, in_msg.LineAddress); + } else if (in_msg.Type == DMARequestType:WRITE) { + if (directory[in_msg.LineAddress].Tokens == max_tokens()) { + trigger(Event:DMA_WRITE_All_Tokens, in_msg.LineAddress); + } else { + trigger(Event:DMA_WRITE, in_msg.LineAddress); + } } else { - DEBUG_EXPR(in_msg.Type); error("Invalid message"); } } @@ -199,7 +357,7 @@ machine(Directory, "Token protocol") { if (directory[address].Tokens > 0) { peek(requestNetwork_in, RequestMsg) { // enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME? - enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME? + enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME? out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -213,11 +371,151 @@ machine(Directory, "Token protocol") { } } + action(px_tryIssuingPersistentGETXRequest, "px", desc="...") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:GETX_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + markPersistentEntries(address); + starving := true; + + TBEs[address].WentPersistent := true; + + // Do not schedule a wakeup, a persistent requests will always complete + } else { + + // We'd like to issue a persistent request, but are not allowed + // to issue a P.R. right now. This, we do not increment the + // IssueCount. + + // Set a wakeup timer + reissueTimerTable.set(address, 10); + } + } + + action(bw_broadcastWrite, "bw", desc="Broadcast GETX if we need tokens") { + peek(dmaRequestQueue_in, DMARequestMsg) { + // + // Assser that we only send message if we don't already have all the tokens + // + assert(directory[address].Tokens != max_tokens()); + enqueue(requestNetwork_out, RequestMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + + // + // Since only one chip, assuming all L1 caches are local + // + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.RetryNum := 0; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + } + } + + action(ps_tryIssuingPersistentGETSRequest, "ps", desc="...") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:GETS_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + markPersistentEntries(address); + starving := true; + + TBEs[address].WentPersistent := true; + + // Do not schedule a wakeup, a persistent requests will always complete + } else { + + // We'd like to issue a persistent request, but are not allowed + // to issue a P.R. right now. This, we do not increment the + // IssueCount. + + // Set a wakeup timer + reissueTimerTable.set(address, 10); + } + } + + action(br_broadcastRead, "br", desc="Broadcast GETS for data") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(requestNetwork_out, RequestMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + + // + // Since only one chip, assuming all L1 caches are local + // + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.RetryNum := 0; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + } + } + action(aa_sendTokensToStarver, "\a", desc="Send tokens to starver") { // Only send a message if we have tokens to send if (directory[address].Tokens > 0) { // enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME? - enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME? + enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME? out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -230,14 +528,14 @@ machine(Directory, "Token protocol") { } } - action(d_sendDataWithAllTokens, "d", desc="Send data and tokens to requestor") { - peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { + action(d_sendMemoryDataWithAllTokens, "d", desc="Send data and tokens to requestor") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:Directory; - out_msg.Destination.add(in_msg.Requestor); + out_msg.Destination.add(in_msg.OriginalRequestorMachId); assert(directory[address].Tokens > 0); out_msg.Tokens := directory[in_msg.Address].Tokens; out_msg.DataBlk := directory[in_msg.Address].DataBlk; @@ -249,21 +547,140 @@ machine(Directory, "Token protocol") { } action(dd_sendDataWithAllTokensToStarver, "\d", desc="Send data and tokens to starver") { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { - out_msg.Address := address; - out_msg.Type := CoherenceResponseType:DATA_OWNER; - out_msg.Sender := machineID; - out_msg.SenderMachine := MachineType:Directory; - out_msg.Destination.add(persistentTable.findSmallest(address)); - assert(directory[address].Tokens > 0); - out_msg.Tokens := directory[address].Tokens; - out_msg.DataBlk := directory[address].DataBlk; - out_msg.Dirty := false; - out_msg.MessageSize := MessageSizeType:Response_Data; + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_OWNER; + out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:Directory; + out_msg.Destination.add(persistentTable.findSmallest(address)); + assert(directory[address].Tokens > 0); + out_msg.Tokens := directory[address].Tokens; + out_msg.DataBlk := directory[address].DataBlk; + out_msg.Dirty := false; + out_msg.MessageSize := MessageSizeType:Response_Data; + } } directory[address].Tokens := 0; } + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { + peek(requestNetwork_in, RequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(fd_memoryDma, "fd", desc="Queue off-chip fetch request") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(lq_queueMemoryWbRequest, "lq", desc="Write data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + DEBUG_EXPR(out_msg); + } + } + + action(ld_queueMemoryDmaWriteFromTbe, "ld", desc="Write DMA data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + // then add the dma write data + out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + DEBUG_EXPR(out_msg); + } + } + + action(lr_queueMemoryDmaReadWriteback, "lr", desc="Write DMA data from read to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + + action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaDataBlk := in_msg.DataBlk; + TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + TBEs[address].WentPersistent := false; + } + } + + action(s_deallocateTBE, "s", desc="Deallocate TBE") { + + if (TBEs[address].WentPersistent) { + assert(starving == true); + + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + } + starving := false; + } + + TBEs.deallocate(address); + } + + action(rd_recordDataInTbe, "rd", desc="Record data in TBE") { + peek(responseNetwork_in, ResponseMsg) { + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + + action(cd_writeCleanDataToTbe, "cd", desc="Write clean memory data to TBE") { + TBEs[address].DataBlk := directory[address].DataBlk; + } + + action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { + directory[address].DataBlk := TBEs[address].DataBlk; + directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + } + action(f_incrementTokens, "f", desc="Increment the number of tokens we're tracking") { peek(responseNetwork_in, ResponseMsg) { assert(in_msg.Tokens >= 1); @@ -275,14 +692,34 @@ machine(Directory, "Token protocol") { requestNetwork_in.dequeue(); } + action(z_recycleRequest, "z", desc="Recycle the request queue") { + requestNetwork_in.recycle(); + } + action(k_popIncomingResponseQueue, "k", desc="Pop incoming response queue") { responseNetwork_in.dequeue(); } + action(kz_recycleResponse, "kz", desc="Recycle incoming response queue") { + responseNetwork_in.recycle(); + } + action(l_popIncomingPersistentQueue, "l", desc="Pop incoming persistent queue") { persistentNetwork_in.dequeue(); } + action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") { + dmaRequestQueue_in.recycle(); + } + + action(l_popMemQueue, "q", desc="Pop off-chip request queue") { + memQueue_in.dequeue(); + } + action(m_writeDataToMemory, "m", desc="Write dirty writeback to memory") { peek(responseNetwork_in, ResponseMsg) { directory[in_msg.Address].DataBlk := in_msg.DataBlk; @@ -291,18 +728,15 @@ machine(Directory, "Token protocol") { } } - action(n_checkIncomingMsg, "n", desc="Check incoming token message") { + action(n_checkData, "n", desc="Check incoming clean data message") { peek(responseNetwork_in, ResponseMsg) { - assert(in_msg.Type == CoherenceResponseType:ACK_OWNER); - assert(in_msg.Dirty == false); - assert(in_msg.MessageSize == MessageSizeType:Writeback_Control); assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk); } } action(r_bounceResponse, "r", desc="Bounce response to starving processor") { peek(responseNetwork_in, ResponseMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -316,7 +750,20 @@ machine(Directory, "Token protocol") { } } - action(s_bounceDatalessOwnerToken, "s", desc="Bounce clean owner token to starving processor") { + action(st_scheduleTimeout, "st", desc="Schedule Timeout") { + // + // currently only support a fixed timeout latency + // + reissueTimerTable.set(address, fixed_timeout_latency); + } + + action(ut_unsetReissueTimer, "ut", desc="Unset reissue timer.") { + if (reissueTimerTable.isSet(address)) { + reissueTimerTable.unset(address); + } + } + + action(bd_bounceDatalessOwnerToken, "bd", desc="Bounce clean owner token to starving processor") { peek(responseNetwork_in, ResponseMsg) { assert(in_msg.Type == CoherenceResponseType:ACK_OWNER); assert(in_msg.Dirty == false); @@ -331,7 +778,7 @@ machine(Directory, "Token protocol") { // Bounce the message, but "re-associate" the data and the owner // token. In essence we're converting an ACK_OWNER message to a // DATA_OWNER message, keeping the number of tokens the same. - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -346,53 +793,212 @@ machine(Directory, "Token protocol") { } } + action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:ACK; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + + action(dm_sendMemoryDataToDma, "dm", desc="Send Data to DMA controller from memory") { + peek(memQueue_in, MemoryMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dd_sendDmaData, "dd", desc="Send Data to DMA controller") { + peek(responseNetwork_in, ResponseMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } // TRANSITIONS - // Trans. from O - transition(O, GETX, NO) { - d_sendDataWithAllTokens; + // + // Trans. from base state O + // the directory has valid data + // + transition(O, GETX, NO_W) { + qf_queueMemoryFetchRequest; j_popIncomingRequestQueue; } - transition(O, GETS, NO) { - d_sendDataWithAllTokens; + transition(O, DMA_WRITE, O_DW) { + vd_allocateDmaRequestInTBE; + bw_broadcastWrite; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + + transition(O, DMA_WRITE_All_Tokens, O_DW_W) { + vd_allocateDmaRequestInTBE; + cd_writeCleanDataToTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + p_popDmaRequestQueue; + } + + transition(O, GETS, NO_W) { + qf_queueMemoryFetchRequest; j_popIncomingRequestQueue; } - transition(O, Lockdown, L) { - dd_sendDataWithAllTokensToStarver; + transition(O, DMA_READ, O_DR_W) { + vd_allocateDmaRequestInTBE; + fd_memoryDma; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + + transition(O, Lockdown, L_W) { + qf_queueMemoryFetchRequest; + l_popIncomingPersistentQueue; + } + + transition(O, {Tokens, Ack_All_Tokens}) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + transition(O, {Data_Owner, Data_All_Tokens}) { + n_checkData; + f_incrementTokens; + k_popIncomingResponseQueue; + } + + // + // transitioning to Owner, waiting for memory before DMA ack + // All other events should recycle/stall + // + transition(O_DR_W, Memory_Data, O) { + dm_sendMemoryDataToDma; + ut_unsetReissueTimer; + s_deallocateTBE; + l_popMemQueue; + } + + // + // issued GETX for DMA write, waiting for all tokens + // + transition(O_DW, Tokens) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + transition(O_DW, Data_Owner) { + f_incrementTokens; + rd_recordDataInTbe; + k_popIncomingResponseQueue; + } + + transition(O_DW, Ack_Owner) { + f_incrementTokens; + cd_writeCleanDataToTbe; + k_popIncomingResponseQueue; + } + + transition(O_DW, Lockdown, DW_L) { l_popIncomingPersistentQueue; } - transition(O, Tokens) { + transition({NO_DW, O_DW}, Data_All_Tokens, O_DW_W) { f_incrementTokens; + rd_recordDataInTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; k_popIncomingResponseQueue; } + transition(O_DW, Ack_All_Tokens, O_DW_W) { + f_incrementTokens; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; + k_popIncomingResponseQueue; + } + + transition(O_DW, Ack_Owner_All_Tokens, O_DW_W) { + f_incrementTokens; + cd_writeCleanDataToTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; + k_popIncomingResponseQueue; + } + + transition(O_DW_W, Memory_Ack, O) { + da_sendDmaAck; + s_deallocateTBE; + l_popMemQueue; + } + + // // Trans. from NO + // The direcotry does not have valid data, but may have some tokens + // transition(NO, GETX) { a_sendTokens; j_popIncomingRequestQueue; } + transition(NO, DMA_WRITE, NO_DW) { + vd_allocateDmaRequestInTBE; + bw_broadcastWrite; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + transition(NO, GETS) { j_popIncomingRequestQueue; } + transition(NO, DMA_READ, NO_DR) { + vd_allocateDmaRequestInTBE; + br_broadcastRead; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + transition(NO, Lockdown, L) { aa_sendTokensToStarver; l_popIncomingPersistentQueue; } - transition(NO, Data_Owner, O) { + transition(NO, {Data_Owner, Data_All_Tokens}, O_W) { m_writeDataToMemory; f_incrementTokens; + lq_queueMemoryWbRequest; k_popIncomingResponseQueue; } - transition(NO, Ack_Owner, O) { - n_checkIncomingMsg; + transition(NO, {Ack_Owner, Ack_Owner_All_Tokens}, O) { + n_checkData; f_incrementTokens; k_popIncomingResponseQueue; } @@ -402,34 +1008,156 @@ machine(Directory, "Token protocol") { k_popIncomingResponseQueue; } + transition(NO_W, Memory_Data, NO) { + d_sendMemoryDataWithAllTokens; + l_popMemQueue; + } + + // Trans. from NO_DW + transition(NO_DW, Request_Timeout) { + ut_unsetReissueTimer; + px_tryIssuingPersistentGETXRequest; + } + + transition(NO_DW, Lockdown, DW_L) { + aa_sendTokensToStarver; + l_popIncomingPersistentQueue; + } + + // Note: NO_DW, Data_All_Tokens transition is combined with O_DW + // Note: NO_DW should not receive the action Ack_All_Tokens because the + // directory does not have valid data + + transition(NO_DW, Data_Owner, O_DW) { + f_incrementTokens; + rd_recordDataInTbe; + lq_queueMemoryWbRequest; + k_popIncomingResponseQueue; + } + + transition({NO_DW, NO_DR}, Tokens) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + // Trans. from NO_DR + transition(NO_DR, Request_Timeout) { + ut_unsetReissueTimer; + ps_tryIssuingPersistentGETSRequest; + } + + transition(NO_DR, Lockdown, DR_L) { + aa_sendTokensToStarver; + l_popIncomingPersistentQueue; + } + + transition(NO_DR, {Data_Owner, Data_All_Tokens}, O_W) { + m_writeDataToMemory; + f_incrementTokens; + dd_sendDmaData; + lr_queueMemoryDmaReadWriteback; + ut_unsetReissueTimer; + s_deallocateTBE; + k_popIncomingResponseQueue; + } + // Trans. from L - transition(L, {GETX, GETS}) { + transition({L, DW_L, DR_L}, {GETX, GETS}) { j_popIncomingRequestQueue; } - transition(L, Lockdown) { + transition({L, DW_L, DR_L, L_W, DR_L_W}, Lockdown) { l_popIncomingPersistentQueue; } - // we could change this to write the data to memory and send it cleanly - transition(L, Data_Owner) { + // + // Received data for lockdown blocks + // For blocks with outstanding dma requests to them + // ...we could change this to write the data to memory and send it cleanly + // ...we could also proactively complete our DMA requests + // However, to keep my mind from spinning out-of-control, we won't for now :) + // + transition({DW_L, DR_L, L}, {Data_Owner, Data_All_Tokens}) { r_bounceResponse; k_popIncomingResponseQueue; } - transition(L, Tokens) { + transition({DW_L, DR_L, L}, Tokens) { r_bounceResponse; k_popIncomingResponseQueue; } - transition(L, Ack_Owner) { - s_bounceDatalessOwnerToken; + transition({DW_L, DR_L, L}, {Ack_Owner_All_Tokens, Ack_Owner}) { + bd_bounceDatalessOwnerToken; k_popIncomingResponseQueue; } - transition(L, Unlockdown, NO) { l_popIncomingPersistentQueue; } + transition(L_W, Memory_Data, L) { + dd_sendDataWithAllTokensToStarver; + l_popMemQueue; + } + + transition(DR_L_W, Memory_Data, DR_L) { + dd_sendDataWithAllTokensToStarver; + l_popMemQueue; + } + + transition(DW_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DW) { + l_popIncomingPersistentQueue; + } + + transition(DR_L_W, {Unlockdown, Own_Lock_or_Unlock}, O_DR_W) { + l_popIncomingPersistentQueue; + } + + transition({DW_L, DR_L_W}, Request_Timeout) { + ut_unsetReissueTimer; + px_tryIssuingPersistentGETXRequest; + } + + transition(DR_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DR) { + l_popIncomingPersistentQueue; + } + + transition(DR_L, Request_Timeout) { + ut_unsetReissueTimer; + ps_tryIssuingPersistentGETSRequest; + } + + transition(O_W, Memory_Ack, O) { + l_popMemQueue; + } + + transition({O, NO, L, O_DW, NO_DW, NO_DR}, Own_Lock_or_Unlock) { + l_popIncomingPersistentQueue; + } + + // Blocked states + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR}, {GETX, GETS}) { + z_recycleRequest; + } + + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR, L, DW_L, DR_L}, {DMA_READ, DMA_WRITE}) { + y_recycleDmaRequestQueue; + } + + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W}, {Data_Owner, Ack_Owner, Tokens}) { + kz_recycleResponse; + } + + transition({NO_W, O_W}, Lockdown, L_W) { + l_popIncomingPersistentQueue; + } + + transition(O_DR_W, Lockdown, DR_L_W) { + l_popIncomingPersistentQueue; + } + + transition({NO_W, O_W, O_DR_W}, {Unlockdown, Own_Lock_or_Unlock}) { + l_popIncomingPersistentQueue; + } } diff --git a/src/mem/protocol/MOESI_CMP_token-dma.sm b/src/mem/protocol/MOESI_CMP_token-dma.sm new file mode 100644 index 000000000..550a36ae0 --- /dev/null +++ b/src/mem/protocol/MOESI_CMP_token-dma.sm @@ -0,0 +1,165 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +machine(DMA, "DMA Controller") +: int request_latency +{ + + MessageBuffer responseFromDir, network="From", virtual_network="0", ordered="true", no_vector="true"; + MessageBuffer reqToDirectory, network="To", virtual_network="5", ordered="false", no_vector="true"; + + enumeration(State, desc="DMA states", default="DMA_State_READY") { + READY, desc="Ready to accept a new request"; + BUSY_RD, desc="Busy: currently processing a request"; + BUSY_WR, desc="Busy: currently processing a request"; + } + + enumeration(Event, desc="DMA events") { + ReadRequest, desc="A new read request"; + WriteRequest, desc="A new write request"; + Data, desc="Data from a DMA memory read"; + Ack, desc="DMA write to memory completed"; + } + + external_type(DMASequencer) { + void ackCallback(); + void dataCallback(DataBlock); + } + + MessageBuffer mandatoryQueue, ordered="false", no_vector="true"; + DMASequencer dma_sequencer, factory='RubySystem::getDMASequencer(m_cfg["dma_sequencer"])', no_vector="true"; + State cur_state, no_vector="true"; + + State getState(Address addr) { + return cur_state; + } + void setState(Address addr, State state) { + cur_state := state; + } + + out_port(reqToDirectory_out, DMARequestMsg, reqToDirectory, desc="..."); + + in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, SequencerMsg) { + if (in_msg.Type == SequencerRequestType:LD ) { + trigger(Event:ReadRequest, in_msg.LineAddress); + } else if (in_msg.Type == SequencerRequestType:ST) { + trigger(Event:WriteRequest, in_msg.LineAddress); + } else { + error("Invalid request type"); + } + } + } + } + + in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") { + if (dmaResponseQueue_in.isReady()) { + peek( dmaResponseQueue_in, DMAResponseMsg) { + if (in_msg.Type == DMAResponseType:ACK) { + trigger(Event:Ack, in_msg.LineAddress); + } else if (in_msg.Type == DMAResponseType:DATA) { + trigger(Event:Data, in_msg.LineAddress); + } else { + error("Invalid response type"); + } + } + } + } + + action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:READ; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:WRITE; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.ackCallback(); + } + } + + action(d_dataCallback, "d", desc="Write data to dma sequencer") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.dataCallback(in_msg.DataBlk); + } + } + + action(p_popRequestQueue, "p", desc="Pop request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(p_popResponseQueue, "\p", desc="Pop request queue") { + dmaResponseQueue_in.dequeue(); + } + + transition(READY, ReadRequest, BUSY_RD) { + s_sendReadRequest; + p_popRequestQueue; + } + + transition(READY, WriteRequest, BUSY_WR) { + s_sendWriteRequest; + p_popRequestQueue; + } + + transition(BUSY_RD, Data, READY) { + d_dataCallback; + p_popResponseQueue; + } + + transition(BUSY_WR, Ack, READY) { + a_ackCallback; + p_popResponseQueue; + } +} diff --git a/src/mem/protocol/MOESI_CMP_token-msg.sm b/src/mem/protocol/MOESI_CMP_token-msg.sm index 2a75ce644..40c16b5e1 100644 --- a/src/mem/protocol/MOESI_CMP_token-msg.sm +++ b/src/mem/protocol/MOESI_CMP_token-msg.sm @@ -59,8 +59,10 @@ enumeration(CoherenceResponseType, desc="...") { // TriggerType enumeration(TriggerType, desc="...") { - REQUEST_TIMEOUT, desc="See corresponding event"; + REQUEST_TIMEOUT, desc="See corresponding event"; USE_TIMEOUT, desc="See corresponding event"; + DATA, desc="data for dma read response"; + DATA_ALL_TOKENS, desc="data and all tokens for dma write response"; } // TriggerMsg @@ -111,13 +113,45 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") { MessageSizeType MessageSize, desc="size category of the message"; } -GenericRequestType convertToGenericType(CoherenceRequestType type) { - if(type == CoherenceRequestType:GETS) { - return GenericRequestType:GETS; - } else if(type == CoherenceRequestType:GETX) { - return GenericRequestType:GETX; - } else { - DEBUG_EXPR(type); - error("invalid CoherenceRequestType"); - } +enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") { + READ, desc="Memory Read"; + WRITE, desc="Memory Write"; + NULL, desc="Invalid"; } + +enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") { + DATA, desc="DATA read"; + ACK, desc="ACK write"; + NULL, desc="Invalid"; +} + +structure(DMARequestMsg, desc="...", interface="NetworkMessage") { + DMARequestType Type, desc="Request type (read/write)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + int Len, desc="The length of the request"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +structure(DMAResponseMsg, desc="...", interface="NetworkMessage") { + DMAResponseType Type, desc="Response type (DATA/ACK)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +//GenericRequestType convertToGenericType(CoherenceRequestType type) { +// if(type == CoherenceRequestType:GETS) { +// return GenericRequestType:GETS; +// } else if(type == CoherenceRequestType:GETX) { +// return GenericRequestType:GETX; +// } else { +// DEBUG_EXPR(type); +// error("invalid CoherenceRequestType"); +// } +//} diff --git a/src/mem/protocol/MOESI_CMP_token.slicc b/src/mem/protocol/MOESI_CMP_token.slicc index ae4a6d6ec..a41226f90 100644 --- a/src/mem/protocol/MOESI_CMP_token.slicc +++ b/src/mem/protocol/MOESI_CMP_token.slicc @@ -2,4 +2,5 @@ MOESI_CMP_token-msg.sm MOESI_CMP_token-L1cache.sm MOESI_CMP_token-L2cache.sm MOESI_CMP_token-dir.sm +MOESI_CMP_token-dma.sm standard_CMP-protocol.sm diff --git a/src/mem/protocol/RubySlicc_Util.sm b/src/mem/protocol/RubySlicc_Util.sm index 312682bd7..e1771448f 100644 --- a/src/mem/protocol/RubySlicc_Util.sm +++ b/src/mem/protocol/RubySlicc_Util.sm @@ -52,7 +52,6 @@ void dirProfileCoherenceRequest(NodeID node, bool needCLB); bool isPerfectProtocol(); bool L1trainsPrefetcher(); int max_tokens(); -int N_tokens(); bool distributedPersistentEnabled(); Address setOffset(Address addr, int offset); Address makeLineAddress(Address addr); |