summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mem/protocol/MOESI_CMP_token-L1cache.sm353
-rw-r--r--src/mem/protocol/MOESI_CMP_token-L2cache.sm174
-rw-r--r--src/mem/protocol/MOESI_CMP_token-dir.sm866
-rw-r--r--src/mem/protocol/MOESI_CMP_token-dma.sm165
-rw-r--r--src/mem/protocol/MOESI_CMP_token-msg.sm54
-rw-r--r--src/mem/protocol/MOESI_CMP_token.slicc1
-rw-r--r--src/mem/protocol/RubySlicc_Util.sm1
-rw-r--r--src/mem/ruby/SConscript1
-rw-r--r--src/mem/ruby/common/NetDest.cc5
-rw-r--r--src/mem/ruby/common/NetDest.hh2
-rw-r--r--src/mem/ruby/config/MOESI_CMP_token.rb92
-rw-r--r--src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb55
-rw-r--r--src/mem/ruby/config/cfg.rb1
-rw-r--r--src/mem/ruby/config/defaults.rb27
-rw-r--r--src/mem/ruby/system/PersistentTable.cc82
-rw-r--r--src/mem/ruby/system/PersistentTable.hh40
-rw-r--r--src/mem/ruby/system/SConscript1
17 files changed, 1581 insertions, 339 deletions
diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
index ab58c5c00..3fb4a8862 100644
--- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
@@ -32,21 +32,32 @@
*
*/
-machine(L1Cache, "Token protocol") {
+machine(L1Cache, "Token protocol")
+ : int l1_request_latency,
+ int l1_response_latency,
+ int l2_select_low_bit,
+ int l2_select_num_bits,
+ int N_tokens,
+ int retry_threshold,
+ int fixed_timeout_latency,
+ bool dynamic_timeout_enabled
+{
// From this node's L1 cache TO the network
- // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
- MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false";
+
// a local L1 -> this L2 bank
- MessageBuffer responseFromL1Cache, network="To", virtual_network="2", ordered="false";
- MessageBuffer persistentFromL1Cache, network="To", virtual_network="3", ordered="true";
+ MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false";
+ MessageBuffer persistentFromL1Cache, network="To", virtual_network="2", ordered="true";
+ // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
+ MessageBuffer requestFromL1Cache, network="To", virtual_network="4", ordered="false";
+
// To this node's L1 cache FROM the network
// a L2 bank -> this L1
- MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false";
+ MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false";
+ MessageBuffer persistentToL1Cache, network="From", virtual_network="2", ordered="true";
// a L2 bank -> this L1
- MessageBuffer responseToL1Cache, network="From", virtual_network="2", ordered="false";
- MessageBuffer persistentToL1Cache, network="From", virtual_network="3", ordered="true";
+ MessageBuffer requestToL1Cache, network="From", virtual_network="4", ordered="false";
// STATES
enumeration(State, desc="Cache states", default="L1Cache_State_I") {
@@ -111,10 +122,6 @@ machine(L1Cache, "Token protocol") {
// TYPES
- int getRetryThreshold();
- int getFixedTimeoutLatency();
- bool getDynamicTimeoutEnabled();
-
// CacheEntry
structure(Entry, desc="...", interface="AbstractCacheEntry") {
State CacheState, desc="cache state";
@@ -143,7 +150,7 @@ machine(L1Cache, "Token protocol") {
external_type(CacheMemory) {
bool cacheAvail(Address);
Address cacheProbe(Address);
- void allocate(Address);
+ void allocate(Address, Entry);
void deallocate(Address);
Entry lookup(Address);
void changePermission(Address, AccessPermission);
@@ -157,17 +164,28 @@ machine(L1Cache, "Token protocol") {
bool isPresent(Address);
}
+ external_type(PersistentTable) {
+ void persistentRequestLock(Address, MachineID, AccessType);
+ void persistentRequestUnlock(Address, MachineID);
+ bool okToIssueStarving(Address, MachineID);
+ MachineID findSmallest(Address);
+ AccessType typeOfSmallest(Address);
+ void markEntries(Address);
+ bool isLocked(Address);
+ int countStarvingForAddress(Address);
+ int countReadStarvingForAddress(Address);
+ }
TBETable L1_TBEs, template_hack="<L1Cache_TBE>";
- CacheMemory L1IcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1I"', abstract_chip_ptr="true";
- CacheMemory L1DcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1D"', abstract_chip_ptr="true";
+ CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])';
+ CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])';
MessageBuffer mandatoryQueue, ordered="false", abstract_chip_ptr="true";
- Sequencer sequencer, abstract_chip_ptr="true", constructor_hack="i";
+ Sequencer sequencer, factory='RubySystem::getSequencer(m_cfg["sequencer"])';
bool starving, default="false";
- PersistentTable persistentTable, constructor_hack="i";
+ PersistentTable persistentTable;
TimerTable useTimerTable;
TimerTable reissueTimerTable;
@@ -175,11 +193,11 @@ machine(L1Cache, "Token protocol") {
int outstandingPersistentRequests, default="0";
int averageLatencyHysteresis, default="(8)"; // Constant that provides hysteresis for calculated the estimated average
- int averageLatencyCounter, default="(500 << (*m_L1Cache_averageLatencyHysteresis_vec[i]))";
+ int averageLatencyCounter, default="(500 << (*m_L1Cache_averageLatencyHysteresis_ptr))";
int averageLatencyEstimate() {
DEBUG_EXPR( (averageLatencyCounter >> averageLatencyHysteresis) );
- profile_average_latency_estimate( (averageLatencyCounter >> averageLatencyHysteresis) );
+ //profile_average_latency_estimate( (averageLatencyCounter >> averageLatencyHysteresis) );
return averageLatencyCounter >> averageLatencyHysteresis;
}
@@ -366,30 +384,33 @@ machine(L1Cache, "Token protocol") {
}
}
- GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) {
- if (machineIDToMachineType(sender) == MachineType:L1Cache) {
- return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this
- } else if (machineIDToMachineType(sender) == MachineType:L2Cache) {
- if ( sender == (map_L1CacheMachId_to_L2Cache(addr,machineID))) {
- return GenericMachineType:L2Cache;
- } else {
- return GenericMachineType:L2Cache_wCC;
- }
- } else {
- return ConvertMachToGenericMach(machineIDToMachineType(sender));
- }
- }
-
- bool okToIssueStarving(Address addr) {
- return persistentTable.okToIssueStarving(addr);
+// GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) {
+// if (machineIDToMachineType(sender) == MachineType:L1Cache) {
+// return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this
+// } else if (machineIDToMachineType(sender) == MachineType:L2Cache) {
+//
+// if (sender == (mapAddressToRange(addr,
+// MachineType:L2Cache,
+// l2_select_low_bit,
+// l2_select_num_bits))) {
+//
+// return GenericMachineType:L2Cache;
+// } else {
+// return GenericMachineType:L2Cache_wCC;
+// }
+// } else {
+// return ConvertMachToGenericMach(machineIDToMachineType(sender));
+// }
+// }
+
+ bool okToIssueStarving(Address addr, MachineID machinID) {
+ return persistentTable.okToIssueStarving(addr, machineID);
}
void markPersistentEntries(Address addr) {
persistentTable.markEntries(addr);
}
- MessageBuffer triggerQueue, ordered="false", random="false";
-
// ** OUT_PORTS **
out_port(persistentNetwork_out, PersistentMsg, persistentFromL1Cache);
out_port(requestNetwork_out, RequestMsg, requestFromL1Cache);
@@ -507,7 +528,11 @@ machine(L1Cache, "Token protocol") {
// Mark TBE flag if response received off-chip. Use this to update average latency estimate
if ( in_msg.SenderMachine == MachineType:L2Cache ) {
- if (in_msg.Sender == map_L1CacheMachId_to_L2Cache(in_msg.Address, machineID)) {
+ if (in_msg.Sender == mapAddressToRange(in_msg.Address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits)) {
+
// came from an off-chip L2 cache
if (L1_TBEs.isPresent(in_msg.Address)) {
// L1_TBEs[in_msg.Address].ExternalResponse := true;
@@ -523,15 +548,15 @@ machine(L1Cache, "Token protocol") {
// profile_memory_response( in_msg.Address);
}
} else if ( in_msg.SenderMachine == MachineType:L1Cache) {
- if (isLocalProcessor(machineID, in_msg.Sender) == false) {
- if (L1_TBEs.isPresent(in_msg.Address)) {
+ //if (isLocalProcessor(machineID, in_msg.Sender) == false) {
+ //if (L1_TBEs.isPresent(in_msg.Address)) {
// L1_TBEs[in_msg.Address].ExternalResponse := true;
// profile_offchipL1_response(in_msg.Address );
- }
- }
- else {
+ //}
+ //}
+ //else {
// profile_onchipL1_response(in_msg.Address );
- }
+ //}
} else {
error("unexpected SenderMachine");
}
@@ -570,42 +595,42 @@ machine(L1Cache, "Token protocol") {
// ** INSTRUCTION ACCESS ***
// Check to see if it is in the OTHER L1
- if (L1DcacheMemory.isTagPresent(in_msg.Address)) {
+ if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) {
// The block is in the wrong L1, try to write it to the L2
- trigger(Event:L1_Replacement, in_msg.Address);
+ trigger(Event:L1_Replacement, in_msg.LineAddress);
}
- if (L1IcacheMemory.isTagPresent(in_msg.Address)) {
+ if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) {
// The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion
- trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+ trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
} else {
- if (L1IcacheMemory.cacheAvail(in_msg.Address)) {
+ if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
// L1 does't have the line, but we have space for it in the L1
- trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+ trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
} else {
// No room in the L1, so we need to make room
- trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.Address));
+ trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.LineAddress));
}
}
} else {
// *** DATA ACCESS ***
// Check to see if it is in the OTHER L1
- if (L1IcacheMemory.isTagPresent(in_msg.Address)) {
+ if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) {
// The block is in the wrong L1, try to write it to the L2
- trigger(Event:L1_Replacement, in_msg.Address);
+ trigger(Event:L1_Replacement, in_msg.LineAddress);
}
- if (L1DcacheMemory.isTagPresent(in_msg.Address)) {
+ if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) {
// The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion
- trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+ trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
} else {
- if (L1DcacheMemory.cacheAvail(in_msg.Address)) {
+ if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
// L1 does't have the line, but we have space for it in the L1
- trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+ trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
} else {
// No room in the L1, so we need to make room
- trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.Address));
+ trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.LineAddress));
}
}
}
@@ -618,19 +643,31 @@ machine(L1Cache, "Token protocol") {
action(a_issueReadRequest, "a", desc="Issue GETS") {
if (L1_TBEs[address].IssueCount == 0) {
// Update outstanding requests
- profile_outstanding_request(outstandingRequests);
+ //profile_outstanding_request(outstandingRequests);
outstandingRequests := outstandingRequests + 1;
}
- if (L1_TBEs[address].IssueCount >= getRetryThreshold() ) {
+ if (L1_TBEs[address].IssueCount >= retry_threshold) {
// Issue a persistent request if possible
- if (okToIssueStarving(address) && (starving == false)) {
- enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") {
+ if (okToIssueStarving(address, machineID) && (starving == false)) {
+ enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) {
out_msg.Address := address;
out_msg.Type := PersistentRequestType:GETS_PERSISTENT;
out_msg.Requestor := machineID;
out_msg.Destination.broadcast(MachineType:L1Cache);
- out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+
+ //
+ // Currently the configuration system limits the system to only one
+ // chip. Therefore, if we assume one shared L2 cache, then only one
+ // pertinent L2 cache exist.
+ //
+ //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
out_msg.Destination.add(map_Address_to_Directory(address));
out_msg.MessageSize := MessageSizeType:Persistent_Control;
out_msg.Prefetch := L1_TBEs[address].Prefetch;
@@ -640,11 +677,11 @@ machine(L1Cache, "Token protocol") {
starving := true;
if (L1_TBEs[address].IssueCount == 0) {
- profile_persistent_prediction(address, L1_TBEs[address].AccessType);
+ //profile_persistent_prediction(address, L1_TBEs[address].AccessType);
}
// Update outstanding requests
- profile_outstanding_persistent_request(outstandingPersistentRequests);
+ //profile_outstanding_persistent_request(outstandingPersistentRequests);
outstandingPersistentRequests := outstandingPersistentRequests + 1;
// Increment IssueCount
@@ -666,11 +703,16 @@ machine(L1Cache, "Token protocol") {
}
} else {
// Make a normal request
- enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETS;
out_msg.Requestor := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
out_msg.RetryNum := L1_TBEs[address].IssueCount;
if (L1_TBEs[address].IssueCount == 0) {
out_msg.MessageSize := MessageSizeType:Request_Control;
@@ -682,11 +724,18 @@ machine(L1Cache, "Token protocol") {
}
// send to other local L1s, with local bit set
- enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETS;
out_msg.Requestor := machineID;
- out_msg.Destination := getOtherLocalL1IDs(machineID);
+
+ //
+ // Since only one chip, assuming all L1 caches are local
+ //
+ //out_msg.Destination := getOtherLocalL1IDs(machineID);
+ out_msg.Destination.broadcast(MachineType:L1Cache);
+ out_msg.Destination.remove(machineID);
+
out_msg.RetryNum := L1_TBEs[address].IssueCount;
out_msg.isLocal := true;
if (L1_TBEs[address].IssueCount == 0) {
@@ -703,10 +752,10 @@ machine(L1Cache, "Token protocol") {
// Set a wakeup timer
- if (getDynamicTimeoutEnabled()) {
+ if (dynamic_timeout_enabled) {
reissueTimerTable.set(address, 1.25 * averageLatencyEstimate());
} else {
- reissueTimerTable.set(address, getFixedTimeoutLatency());
+ reissueTimerTable.set(address, fixed_timeout_latency);
}
}
@@ -716,20 +765,32 @@ machine(L1Cache, "Token protocol") {
if (L1_TBEs[address].IssueCount == 0) {
// Update outstanding requests
- profile_outstanding_request(outstandingRequests);
+ //profile_outstanding_request(outstandingRequests);
outstandingRequests := outstandingRequests + 1;
}
- if (L1_TBEs[address].IssueCount >= getRetryThreshold() ) {
+ if (L1_TBEs[address].IssueCount >= retry_threshold) {
// Issue a persistent request if possible
- if ( okToIssueStarving(address) && (starving == false)) {
- enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") {
+ if ( okToIssueStarving(address, machineID) && (starving == false)) {
+ enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) {
out_msg.Address := address;
out_msg.Type := PersistentRequestType:GETX_PERSISTENT;
out_msg.Requestor := machineID;
out_msg.RequestorMachine := MachineType:L1Cache;
out_msg.Destination.broadcast(MachineType:L1Cache);
- out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+
+ //
+ // Currently the configuration system limits the system to only one
+ // chip. Therefore, if we assume one shared L2 cache, then only one
+ // pertinent L2 cache exist.
+ //
+ //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
out_msg.Destination.add(map_Address_to_Directory(address));
out_msg.MessageSize := MessageSizeType:Persistent_Control;
out_msg.Prefetch := L1_TBEs[address].Prefetch;
@@ -739,11 +800,11 @@ machine(L1Cache, "Token protocol") {
starving := true;
// Update outstanding requests
- profile_outstanding_persistent_request(outstandingPersistentRequests);
+ //profile_outstanding_persistent_request(outstandingPersistentRequests);
outstandingPersistentRequests := outstandingPersistentRequests + 1;
if (L1_TBEs[address].IssueCount == 0) {
- profile_persistent_prediction(address, L1_TBEs[address].AccessType);
+ //profile_persistent_prediction(address, L1_TBEs[address].AccessType);
}
// Increment IssueCount
@@ -766,12 +827,17 @@ machine(L1Cache, "Token protocol") {
} else {
// Make a normal request
- enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETX;
out_msg.Requestor := machineID;
out_msg.RequestorMachine := MachineType:L1Cache;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
out_msg.RetryNum := L1_TBEs[address].IssueCount;
if (L1_TBEs[address].IssueCount == 0) {
@@ -784,12 +850,19 @@ machine(L1Cache, "Token protocol") {
}
// send to other local L1s too
- enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETX;
out_msg.Requestor := machineID;
out_msg.isLocal := true;
- out_msg.Destination := getOtherLocalL1IDs(machineID);
+
+ //
+ // Since only one chip, assuming all L1 caches are local
+ //
+ //out_msg.Destination := getOtherLocalL1IDs(machineID);
+ out_msg.Destination.broadcast(MachineType:L1Cache);
+ out_msg.Destination.remove(machineID);
+
out_msg.RetryNum := L1_TBEs[address].IssueCount;
if (L1_TBEs[address].IssueCount == 0) {
out_msg.MessageSize := MessageSizeType:Request_Control;
@@ -807,10 +880,10 @@ machine(L1Cache, "Token protocol") {
DEBUG_EXPR(L1_TBEs[address].IssueCount);
// Set a wakeup timer
- if (getDynamicTimeoutEnabled()) {
+ if (dynamic_timeout_enabled) {
reissueTimerTable.set(address, 1.25 * averageLatencyEstimate());
} else {
- reissueTimerTable.set(address, getFixedTimeoutLatency());
+ reissueTimerTable.set(address, fixed_timeout_latency);
}
}
}
@@ -818,7 +891,7 @@ machine(L1Cache, "Token protocol") {
action(bb_bounceResponse, "\b", desc="Bounce tokens and data to memory") {
peek(responseNetwork_in, ResponseMsg) {
// FIXME, should use a 3rd vnet
- enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := in_msg.Type;
out_msg.Sender := machineID;
@@ -833,11 +906,16 @@ machine(L1Cache, "Token protocol") {
}
action(c_ownedReplacement, "c", desc="Issue writeback") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L1Cache;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
out_msg.Tokens := getCacheEntry(address).Tokens;
out_msg.DataBlk := getCacheEntry(address).DataBlk;
out_msg.Dirty := getCacheEntry(address).Dirty;
@@ -853,11 +931,16 @@ machine(L1Cache, "Token protocol") {
// don't send writeback if replacing block with no tokens
if (getCacheEntry(address).Tokens != 0) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L1Cache;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
out_msg.Tokens := getCacheEntry(address).Tokens;
out_msg.DataBlk := getCacheEntry(address).DataBlk;
// assert(getCacheEntry(address).Dirty == false);
@@ -879,7 +962,7 @@ machine(L1Cache, "Token protocol") {
action(d_sendDataWithToken, "d", desc="Send data and a token from cache to requestor") {
peek(requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_SHARED;
out_msg.Sender := machineID;
@@ -902,14 +985,14 @@ machine(L1Cache, "Token protocol") {
action(d_sendDataWithNTokenIfAvail, "\dd", desc="Send data and a token from cache to requestor") {
peek(requestNetwork_in, RequestMsg) {
- if (getCacheEntry(address).Tokens > N_tokens()) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ if (getCacheEntry(address).Tokens > N_tokens) {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_SHARED;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L1Cache;
out_msg.Destination.add(in_msg.Requestor);
- out_msg.Tokens := N_tokens();
+ out_msg.Tokens := N_tokens;
out_msg.DataBlk := getCacheEntry(address).DataBlk;
// out_msg.Dirty := getCacheEntry(address).Dirty;
out_msg.Dirty := false;
@@ -919,10 +1002,10 @@ machine(L1Cache, "Token protocol") {
out_msg.MessageSize := MessageSizeType:Response_Data;
}
}
- getCacheEntry(address).Tokens := getCacheEntry(address).Tokens - N_tokens();
+ getCacheEntry(address).Tokens := getCacheEntry(address).Tokens - N_tokens;
}
else if (getCacheEntry(address).Tokens > 1) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_SHARED;
out_msg.Sender := machineID;
@@ -946,7 +1029,7 @@ machine(L1Cache, "Token protocol") {
action(dd_sendDataWithAllTokens, "\d", desc="Send data and all tokens from cache to requestor") {
peek(requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
@@ -969,7 +1052,7 @@ machine(L1Cache, "Token protocol") {
action(e_sendAckWithCollectedTokens, "e", desc="Send ack with the tokens we've collected thus far.") {
// assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself
if (getCacheEntry(address).Tokens > 0) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -986,7 +1069,7 @@ machine(L1Cache, "Token protocol") {
action(ee_sendDataWithAllTokens, "\e", desc="Send data and all tokens from cache to starver") {
//assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself
assert(getCacheEntry(address).Tokens > 0);
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
@@ -1005,23 +1088,23 @@ machine(L1Cache, "Token protocol") {
//assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself
assert(getCacheEntry(address).Tokens > 0);
if (getCacheEntry(address).Tokens > 1) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L1Cache;
out_msg.Destination.add(persistentTable.findSmallest(address));
assert(getCacheEntry(address).Tokens >= 1);
- if (getCacheEntry(address).Tokens > N_tokens()) {
- out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens();
+ if (getCacheEntry(address).Tokens > N_tokens) {
+ out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens;
} else {
out_msg.Tokens := getCacheEntry(address).Tokens - 1;
}
out_msg.MessageSize := MessageSizeType:Response_Control;
}
}
- if (getCacheEntry(address).Tokens > N_tokens()) {
- getCacheEntry(address).Tokens := N_tokens();
+ if (getCacheEntry(address).Tokens > N_tokens) {
+ getCacheEntry(address).Tokens := N_tokens;
} else {
getCacheEntry(address).Tokens := 1;
}
@@ -1031,15 +1114,15 @@ machine(L1Cache, "Token protocol") {
//assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself
assert(getCacheEntry(address).Tokens > 0);
if (getCacheEntry(address).Tokens > 1) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L1Cache;
out_msg.Destination.add(persistentTable.findSmallest(address));
assert(getCacheEntry(address).Tokens >= 1);
- if (getCacheEntry(address).Tokens > N_tokens()) {
- out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens();
+ if (getCacheEntry(address).Tokens > N_tokens) {
+ out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens;
} else {
out_msg.Tokens := getCacheEntry(address).Tokens - 1;
}
@@ -1047,8 +1130,8 @@ machine(L1Cache, "Token protocol") {
out_msg.Dirty := getCacheEntry(address).Dirty;
out_msg.MessageSize := MessageSizeType:Response_Data;
}
- if (getCacheEntry(address).Tokens > N_tokens()) {
- getCacheEntry(address).Tokens := N_tokens();
+ if (getCacheEntry(address).Tokens > N_tokens) {
+ getCacheEntry(address).Tokens := N_tokens;
} else {
getCacheEntry(address).Tokens := 1;
}
@@ -1061,7 +1144,7 @@ machine(L1Cache, "Token protocol") {
peek(responseNetwork_in, ResponseMsg) {
// assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself
// FIXME, should use a 3rd vnet in some cases
- enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := in_msg.Type;
out_msg.Sender := machineID;
@@ -1079,7 +1162,8 @@ machine(L1Cache, "Token protocol") {
action(h_load_hit, "h", desc="Notify sequencer the load completed.") {
DEBUG_EXPR(address);
DEBUG_EXPR(getCacheEntry(address).DataBlk);
- sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No);
+ //sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No);
+ sequencer.readCallback(address, getCacheEntry(address).DataBlk);
}
action(x_external_load_hit, "x", desc="Notify sequencer the load completed.") {
@@ -1087,14 +1171,16 @@ machine(L1Cache, "Token protocol") {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
peek(responseNetwork_in, ResponseMsg) {
- sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No);
+ //sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No);
+ sequencer.readCallback(address, getCacheEntry(address).DataBlk);
}
}
action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") {
DEBUG_EXPR(address);
DEBUG_EXPR(getCacheEntry(address).DataBlk);
- sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No);
+ //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No);
+ sequencer.writeCallback(address, getCacheEntry(address).DataBlk);
getCacheEntry(address).Dirty := true;
DEBUG_EXPR(getCacheEntry(address).DataBlk);
}
@@ -1103,7 +1189,8 @@ machine(L1Cache, "Token protocol") {
DEBUG_EXPR(address);
DEBUG_EXPR(getCacheEntry(address).DataBlk);
peek(responseNetwork_in, ResponseMsg) {
- sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No);
+ //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No);
+ sequencer.writeCallback(address, getCacheEntry(address).DataBlk);
}
getCacheEntry(address).Dirty := true;
DEBUG_EXPR(getCacheEntry(address).DataBlk);
@@ -1133,8 +1220,6 @@ machine(L1Cache, "Token protocol") {
useTimerTable.unset(address);
}
-
-
action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
mandatoryQueue_in.dequeue();
}
@@ -1156,14 +1241,19 @@ machine(L1Cache, "Token protocol") {
}
action(p_informL2AboutTokenLoss, "p", desc="Inform L2 about loss of all tokens") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:INV;
out_msg.Tokens := 0;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L1Cache;
out_msg.DestMachine := MachineType:L2Cache;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
out_msg.MessageSize := MessageSizeType:Response_Control;
}
}
@@ -1189,13 +1279,25 @@ machine(L1Cache, "Token protocol") {
if (L1_TBEs[address].WentPersistent) {
// assert(starving == true);
outstandingRequests := outstandingRequests - 1;
- enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) {
out_msg.Address := address;
out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT;
out_msg.Requestor := machineID;
out_msg.RequestorMachine := MachineType:L1Cache;
out_msg.Destination.broadcast(MachineType:L1Cache);
- out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+
+ //
+ // Currently the configuration system limits the system to only one
+ // chip. Therefore, if we assume one shared L2 cache, then only one
+ // pertinent L2 cache exist.
+ //
+ //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
out_msg.Destination.add(map_Address_to_Directory(address));
out_msg.MessageSize := MessageSizeType:Persistent_Control;
}
@@ -1217,14 +1319,14 @@ machine(L1Cache, "Token protocol") {
// profile_token_retry(address, L1_TBEs[address].AccessType, 1);
//}
- profile_token_retry(address, L1_TBEs[address].AccessType, L1_TBEs[address].IssueCount);
+ //profile_token_retry(address, L1_TBEs[address].AccessType, L1_TBEs[address].IssueCount);
L1_TBEs.deallocate(address);
}
action(t_sendAckWithCollectedTokens, "t", desc="Send ack with the tokens we've collected thus far.") {
if (getCacheEntry(address).Tokens > 0) {
peek(requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -1259,13 +1361,13 @@ machine(L1Cache, "Token protocol") {
action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") {
if (L1DcacheMemory.isTagPresent(address) == false) {
- L1DcacheMemory.allocate(address);
+ L1DcacheMemory.allocate(address, new Entry);
}
}
action(pp_allocateL1ICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") {
if (L1IcacheMemory.isTagPresent(address) == false) {
- L1IcacheMemory.allocate(address);
+ L1IcacheMemory.allocate(address, new Entry);
}
}
@@ -1281,11 +1383,6 @@ machine(L1Cache, "Token protocol") {
}
}
-
- action(z_stall, "z", desc="Stall") {
-
- }
-
action(zz_recycleMandatoryQueue, "\z", desc="Send the head of the mandatory queue to the back of the queue.") {
mandatoryQueue_in.recycle();
}
diff --git a/src/mem/protocol/MOESI_CMP_token-L2cache.sm b/src/mem/protocol/MOESI_CMP_token-L2cache.sm
index 0a58ed5cf..9a5c400f2 100644
--- a/src/mem/protocol/MOESI_CMP_token-L2cache.sm
+++ b/src/mem/protocol/MOESI_CMP_token-L2cache.sm
@@ -32,20 +32,33 @@
*
*/
-machine(L2Cache, "Token protocol") {
+machine(L2Cache, "Token protocol")
+ : int l2_request_latency,
+ int l2_response_latency,
+ int N_tokens,
+ bool filtering_enabled
+{
// L2 BANK QUEUES
// From local bank of L2 cache TO the network
- MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="0", ordered="false"; // this L2 bank -> a local L1
- MessageBuffer GlobalRequestFromL2Cache, network="To", virtual_network="1", ordered="false"; // this L2 bank -> mod-directory
- MessageBuffer responseFromL2Cache, network="To", virtual_network="2", ordered="false"; // this L2 bank -> a local L1 || mod-directory
+
+ // this L2 bank -> a local L1 || mod-directory
+ MessageBuffer responseFromL2Cache, network="To", virtual_network="1", ordered="false";
+ // this L2 bank -> mod-directory
+ MessageBuffer GlobalRequestFromL2Cache, network="To", virtual_network="3", ordered="false";
+ // this L2 bank -> a local L1
+ MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="4", ordered="false";
// FROM the network to this local bank of L2 cache
- MessageBuffer L1RequestToL2Cache, network="From", virtual_network="0", ordered="false"; // a local L1 -> this L2 bank
- MessageBuffer GlobalRequestToL2Cache, network="From", virtual_network="1", ordered="false"; // mod-directory -> this L2 bank
- MessageBuffer responseToL2Cache, network="From", virtual_network="2", ordered="false"; // a local L1 || mod-directory -> this L2 bank
- MessageBuffer persistentToL2Cache, network="From", virtual_network="3", ordered="true";
+
+ // a local L1 || mod-directory -> this L2 bank
+ MessageBuffer responseToL2Cache, network="From", virtual_network="1", ordered="false";
+ MessageBuffer persistentToL2Cache, network="From", virtual_network="2", ordered="true";
+ // mod-directory -> this L2 bank
+ MessageBuffer GlobalRequestToL2Cache, network="From", virtual_network="3", ordered="false";
+ // a local L1 -> this L2 bank
+ MessageBuffer L1RequestToL2Cache, network="From", virtual_network="4", ordered="false";
// STATES
enumeration(State, desc="L2 Cache states", default="L2Cache_State_I") {
@@ -107,8 +120,6 @@ machine(L2Cache, "Token protocol") {
DataBlock DataBlk, desc="data for the block";
}
-
-
structure(DirEntry, desc="...") {
Set Sharers, desc="Set of the internal processors that want the block in shared state";
bool exclusive, default="false", desc="if local exclusive is likely";
@@ -117,7 +128,7 @@ machine(L2Cache, "Token protocol") {
external_type(CacheMemory) {
bool cacheAvail(Address);
Address cacheProbe(Address);
- void allocate(Address);
+ void allocate(Address, Entry);
void deallocate(Address);
Entry lookup(Address);
void changePermission(Address, AccessPermission);
@@ -132,19 +143,28 @@ machine(L2Cache, "Token protocol") {
bool isTagPresent(Address);
}
+ external_type(PersistentTable) {
+ void persistentRequestLock(Address, MachineID, AccessType);
+ void persistentRequestUnlock(Address, MachineID);
+ MachineID findSmallest(Address);
+ AccessType typeOfSmallest(Address);
+ void markEntries(Address);
+ bool isLocked(Address);
+ int countStarvingForAddress(Address);
+ int countReadStarvingForAddress(Address);
+ }
- CacheMemory L2cacheMemory, template_hack="<L2Cache_Entry>", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,MachineType_L2Cache,int_to_string(i)+"_L2"';
+ CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])';
- PersistentTable persistentTable, constructor_hack="i";
+ PersistentTable persistentTable;
PerfectCacheMemory localDirectory, template_hack="<L2Cache_DirEntry>";
-
- bool getFilteringEnabled();
-
Entry getL2CacheEntry(Address addr), return_by_ref="yes" {
if (L2cacheMemory.isTagPresent(addr)) {
return L2cacheMemory[addr];
}
+ assert(false);
+ return L2cacheMemory[addr];
}
int getTokens(Address addr) {
@@ -465,15 +485,21 @@ machine(L2Cache, "Token protocol") {
// if this is a retry or no local sharers, broadcast normally
// if (in_msg.RetryNum > 0 || (in_msg.Type == CoherenceRequestType:GETX && exclusiveExists(in_msg.Address) == false) || (in_msg.Type == CoherenceRequestType:GETS && sharersExist(in_msg.Address) == false)) {
- enqueue(globalRequestNetwork_out, RequestMsg, latency="L2_REQUEST_LATENCY") {
+ enqueue(globalRequestNetwork_out, RequestMsg, latency=l2_request_latency) {
out_msg.Address := in_msg.Address;
out_msg.Type := in_msg.Type;
out_msg.Requestor := in_msg.Requestor;
out_msg.RequestorMachine := in_msg.RequestorMachine;
- //out_msg.Destination.broadcast(MachineType:L2Cache);
out_msg.RetryNum := in_msg.RetryNum;
- out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
- out_msg.Destination.remove(map_L1CacheMachId_to_L2Cache(address, in_msg.Requestor));
+
+ //
+ // If a statically shared L2 cache, then no other L2 caches can
+ // store the block
+ //
+ //out_msg.Destination.broadcast(MachineType:L2Cache);
+ //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+ //out_msg.Destination.remove(map_L1CacheMachId_to_L2Cache(address, in_msg.Requestor));
+
out_msg.Destination.add(map_Address_to_Directory(address));
out_msg.MessageSize := MessageSizeType:Request_Control;
out_msg.AccessMode := in_msg.AccessMode;
@@ -489,7 +515,7 @@ machine(L2Cache, "Token protocol") {
action(bb_bounceResponse, "\b", desc="Bounce tokens and data to memory") {
peek(responseNetwork_in, ResponseMsg) {
// FIXME, should use a 3rd vnet
- enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := in_msg.Type;
out_msg.Sender := machineID;
@@ -505,7 +531,7 @@ machine(L2Cache, "Token protocol") {
action(c_cleanReplacement, "c", desc="Issue clean writeback") {
if (getL2CacheEntry(address).Tokens > 0) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -519,7 +545,7 @@ machine(L2Cache, "Token protocol") {
}
action(cc_dirtyReplacement, "\c", desc="Issue dirty writeback") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L2Cache;
@@ -541,22 +567,22 @@ machine(L2Cache, "Token protocol") {
action(d_sendDataWithTokens, "d", desc="Send data and a token from cache to requestor") {
peek(requestNetwork_in, RequestMsg) {
- if (getL2CacheEntry(address).Tokens > N_tokens()) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ if (getL2CacheEntry(address).Tokens > N_tokens) {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_SHARED;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L2Cache;
out_msg.Destination.add(in_msg.Requestor);
- out_msg.Tokens := N_tokens();
+ out_msg.Tokens := N_tokens;
out_msg.DataBlk := getL2CacheEntry(address).DataBlk;
out_msg.Dirty := false;
out_msg.MessageSize := MessageSizeType:Response_Data;
}
- getL2CacheEntry(address).Tokens := getL2CacheEntry(address).Tokens - N_tokens();
+ getL2CacheEntry(address).Tokens := getL2CacheEntry(address).Tokens - N_tokens;
}
else {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_SHARED;
out_msg.Sender := machineID;
@@ -574,7 +600,7 @@ machine(L2Cache, "Token protocol") {
action(dd_sendDataWithAllTokens, "\d", desc="Send data and all tokens from cache to requestor") {
peek(requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
@@ -592,7 +618,7 @@ machine(L2Cache, "Token protocol") {
action(e_sendAckWithCollectedTokens, "e", desc="Send ack with the tokens we've collected thus far.") {
if (getL2CacheEntry(address).Tokens > 0) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -607,7 +633,7 @@ machine(L2Cache, "Token protocol") {
}
action(ee_sendDataWithAllTokens, "\e", desc="Send data and all tokens from cache to starver") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
@@ -626,7 +652,7 @@ machine(L2Cache, "Token protocol") {
//assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself
assert(getL2CacheEntry(address).Tokens > 0);
if (getL2CacheEntry(address).Tokens > 1) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -644,7 +670,7 @@ machine(L2Cache, "Token protocol") {
//assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself
assert(getL2CacheEntry(address).Tokens > 0);
if (getL2CacheEntry(address).Tokens > 1) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
@@ -666,7 +692,7 @@ machine(L2Cache, "Token protocol") {
// assert(persistentTable.isLocked(address));
peek(responseNetwork_in, ResponseMsg) {
// FIXME, should use a 3rd vnet in some cases
- enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := in_msg.Type;
out_msg.Sender := machineID;
@@ -684,7 +710,7 @@ machine(L2Cache, "Token protocol") {
//assert(persistentTable.isLocked(address));
peek(responseNetwork_in, ResponseMsg) {
// FIXME, should use a 3rd vnet in some cases
- enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
if (in_msg.Type == CoherenceResponseType:WB_SHARED_DATA) {
out_msg.Type := CoherenceResponseType:DATA_SHARED;
@@ -706,7 +732,7 @@ machine(L2Cache, "Token protocol") {
// assert(persistentTable.isLocked(address));
peek(responseNetwork_in, ResponseMsg) {
// FIXME, should use a 3rd vnet in some cases
- enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
@@ -729,24 +755,31 @@ machine(L2Cache, "Token protocol") {
action(j_forwardTransientRequestToLocalSharers, "j", desc="Forward external transient request to local sharers") {
peek(requestNetwork_in, RequestMsg) {
- if (getFilteringEnabled() == true && in_msg.RetryNum == 0 && sharersExist(in_msg.Address) == false) {
- profile_filter_action(1);
+ if (filtering_enabled == true && in_msg.RetryNum == 0 && sharersExist(in_msg.Address) == false) {
+ //profile_filter_action(1);
DEBUG_EXPR("filtered message");
DEBUG_EXPR(in_msg.RetryNum);
}
else {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue(localRequestNetwork_out, RequestMsg, latency=l2_response_latency ) {
out_msg.Address := in_msg.Address;
out_msg.Requestor := in_msg.Requestor;
out_msg.RequestorMachine := in_msg.RequestorMachine;
- out_msg.Destination := getLocalL1IDs(machineID);
+
+ //
+ // Currently assuming only one chip so all L1s are local
+ //
+ //out_msg.Destination := getLocalL1IDs(machineID);
+ out_msg.Destination.broadcast(MachineType:L1Cache);
+ out_msg.Destination.remove(in_msg.Requestor);
+
out_msg.Type := in_msg.Type;
out_msg.isLocal := false;
out_msg.MessageSize := MessageSizeType:Request_Control;
out_msg.AccessMode := in_msg.AccessMode;
out_msg.Prefetch := in_msg.Prefetch;
}
- profile_filter_action(0);
+ //profile_filter_action(0);
}
}
}
@@ -756,7 +789,7 @@ machine(L2Cache, "Token protocol") {
peek(L1requestNetwork_in, RequestMsg) {
assert(getL2CacheEntry(address).Tokens > 0);
//enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_to_L1_RESPONSE_LATENCY") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_SHARED;
out_msg.Sender := machineID;
@@ -774,7 +807,7 @@ machine(L2Cache, "Token protocol") {
action(k_dataOwnerFromL2CacheToL1Requestor, "\k", desc="Send data and a token from cache to L1 requestor") {
peek(L1requestNetwork_in, RequestMsg) {
assert(getL2CacheEntry(address).Tokens > 0);
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
@@ -793,7 +826,7 @@ machine(L2Cache, "Token protocol") {
peek(L1requestNetwork_in, RequestMsg) {
// assert(getL2CacheEntry(address).Tokens == max_tokens());
//enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_to_L1_RESPONSE_LATENCY") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
@@ -840,12 +873,13 @@ machine(L2Cache, "Token protocol") {
}
action(r_markNewSharer, "r", desc="Mark the new local sharer from local request message") {
-
peek(L1requestNetwork_in, RequestMsg) {
- if (in_msg.Type == CoherenceRequestType:GETX) {
- setNewWriter(in_msg.Address, machineIDToNodeID(in_msg.Requestor));
- } else if (in_msg.Type == CoherenceRequestType:GETS) {
- addNewSharer(in_msg.Address, machineIDToNodeID(in_msg.Requestor));
+ if (machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache) {
+ if (in_msg.Type == CoherenceRequestType:GETX) {
+ setNewWriter(in_msg.Address, machineIDToNodeID(in_msg.Requestor));
+ } else if (in_msg.Type == CoherenceRequestType:GETS) {
+ addNewSharer(in_msg.Address, machineIDToNodeID(in_msg.Requestor));
+ }
}
}
}
@@ -854,16 +888,19 @@ machine(L2Cache, "Token protocol") {
clearExclusiveBitIfExists(address);
}
- action( r_setMRU, "\rr", desc="manually set the MRU bit for cache line" ) {
- if(isCacheTagPresent(address)) {
- L2cacheMemory.setMRU(address);
+ action(r_setMRU, "\rr", desc="manually set the MRU bit for cache line" ) {
+ peek(L1requestNetwork_in, RequestMsg) {
+ if ((machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache) &&
+ (isCacheTagPresent(address))) {
+ L2cacheMemory.setMRU(address);
+ }
}
}
action(t_sendAckWithCollectedTokens, "t", desc="Send ack with the tokens we've collected thus far.") {
if (getL2CacheEntry(address).Tokens > 0) {
peek(requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -881,7 +918,7 @@ machine(L2Cache, "Token protocol") {
action(tt_sendLocalAckWithCollectedTokens, "tt", desc="Send ack with the tokens we've collected thus far.") {
if (getL2CacheEntry(address).Tokens > 0) {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -906,19 +943,19 @@ machine(L2Cache, "Token protocol") {
}
action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") {
- L2cacheMemory.allocate(address);
+ L2cacheMemory.allocate(address, new Entry);
}
action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
L2cacheMemory.deallocate(address);
}
- action(uu_profileMiss, "\u", desc="Profile the demand miss") {
- peek(L1requestNetwork_in, RequestMsg) {
+ //action(uu_profileMiss, "\u", desc="Profile the demand miss") {
+ // peek(L1requestNetwork_in, RequestMsg) {
// AccessModeType not implemented
//profile_L2Cache_miss(convertToGenericType(in_msg.Type), in_msg.AccessMode, MessageSizeTypeToInt(in_msg.MessageSize), in_msg.Prefetch, machineIDToNodeID(in_msg.Requestor));
- }
- }
+ // }
+ //}
action(w_assertIncomingDataAndCacheDataMatch, "w", desc="Assert that the incoming data and the data in the cache match") {
@@ -927,11 +964,6 @@ machine(L2Cache, "Token protocol") {
}
}
- action(z_stall, "z", desc="Stall") {
- }
-
-
-
//*****************************************************
// TRANSITIONS
@@ -961,7 +993,7 @@ machine(L2Cache, "Token protocol") {
transition(NP, {L1_GETS, L1_GETX}) {
a_broadcastLocalRequest;
r_markNewSharer;
- uu_profileMiss;
+ //uu_profileMiss;
o_popL1RequestQueue;
}
@@ -1012,7 +1044,7 @@ machine(L2Cache, "Token protocol") {
a_broadcastLocalRequest;
tt_sendLocalAckWithCollectedTokens; // send any tokens we have collected
r_markNewSharer;
- uu_profileMiss;
+ //uu_profileMiss;
o_popL1RequestQueue;
}
@@ -1020,7 +1052,7 @@ machine(L2Cache, "Token protocol") {
a_broadcastLocalRequest;
tt_sendLocalAckWithCollectedTokens; // send any tokens we have collected
r_markNewSharer;
- uu_profileMiss;
+ //uu_profileMiss;
o_popL1RequestQueue;
}
@@ -1181,7 +1213,7 @@ machine(L2Cache, "Token protocol") {
tt_sendLocalAckWithCollectedTokens;
r_markNewSharer;
r_setMRU;
- uu_profileMiss;
+ //uu_profileMiss;
o_popL1RequestQueue;
}
@@ -1294,7 +1326,7 @@ machine(L2Cache, "Token protocol") {
k_dataAndAllTokensFromL2CacheToL1Requestor;
r_markNewSharer;
r_setMRU;
- uu_profileMiss;
+ //uu_profileMiss;
o_popL1RequestQueue;
}
@@ -1382,7 +1414,7 @@ machine(L2Cache, "Token protocol") {
transition(I_L, {L1_GETX, L1_GETS}) {
a_broadcastLocalRequest;
r_markNewSharer;
- uu_profileMiss;
+ //uu_profileMiss;
o_popL1RequestQueue;
}
@@ -1391,7 +1423,7 @@ machine(L2Cache, "Token protocol") {
tt_sendLocalAckWithCollectedTokens;
r_markNewSharer;
r_setMRU;
- uu_profileMiss;
+ //uu_profileMiss;
o_popL1RequestQueue;
}
diff --git a/src/mem/protocol/MOESI_CMP_token-dir.sm b/src/mem/protocol/MOESI_CMP_token-dir.sm
index 1592fd123..7925a8fe0 100644
--- a/src/mem/protocol/MOESI_CMP_token-dir.sm
+++ b/src/mem/protocol/MOESI_CMP_token-dir.sm
@@ -32,14 +32,23 @@
*/
-machine(Directory, "Token protocol") {
-
- MessageBuffer requestFromDir, network="To", virtual_network="1", ordered="false";
- MessageBuffer responseFromDir, network="To", virtual_network="2", ordered="false";
-
- MessageBuffer persistentToDir, network="From", virtual_network="3", ordered="true";
- MessageBuffer requestToDir, network="From", virtual_network="1", ordered="false";
- MessageBuffer responseToDir, network="From", virtual_network="2", ordered="false";
+machine(Directory, "Token protocol")
+ : int directory_latency,
+ int l2_select_low_bit,
+ int l2_select_num_bits,
+ bool distributed_persistent,
+ int fixed_timeout_latency
+{
+
+ MessageBuffer dmaResponseFromDir, network="To", virtual_network="0", ordered="true";
+ MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false";
+ MessageBuffer persistentFromDir, network="To", virtual_network="2", ordered="true";
+ MessageBuffer requestFromDir, network="To", virtual_network="4", ordered="false";
+
+ MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false";
+ MessageBuffer persistentToDir, network="From", virtual_network="2", ordered="true";
+ MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false";
+ MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true";
// STATES
enumeration(State, desc="Directory states", default="Directory_State_O") {
@@ -47,6 +56,24 @@ machine(Directory, "Token protocol") {
O, desc="Owner";
NO, desc="Not Owner";
L, desc="Locked";
+
+ // Memory wait states - can block all messages including persistent requests
+ O_W, desc="transitioning to Owner, waiting for memory write";
+ L_W, desc="transitioning to Locked, waiting for memory read";
+ DR_L_W, desc="transitioning to Locked underneath a DMA read, waiting for memory data";
+ NO_W, desc="transitioning to Not Owner, waiting for memory read";
+ O_DW_W, desc="transitioning to Owner, waiting for memory before DMA ack";
+ O_DR_W, desc="transitioning to Owner, waiting for memory before DMA data";
+
+ // DMA request transient states - must respond to persistent requests
+ O_DW, desc="issued GETX for DMA write, waiting for all tokens";
+ NO_DW, desc="issued GETX for DMA write, waiting for all tokens";
+ NO_DR, desc="issued GETS for DMA read, waiting for data";
+
+ // DMA request in progress - competing with a CPU persistent request
+ DW_L, desc="issued GETX for DMA write, CPU persistent request must complete first";
+ DR_L, desc="issued GETS for DMA read, CPU persistent request must complete first";
+
}
// Events
@@ -55,9 +82,23 @@ machine(Directory, "Token protocol") {
GETS, desc="A GETS arrives";
Lockdown, desc="A lockdown request arrives";
Unlockdown, desc="An un-lockdown request arrives";
+ Own_Lock_or_Unlock, desc="own lock or unlock";
Data_Owner, desc="Data arrive";
+ Data_All_Tokens, desc="Data and all tokens";
Ack_Owner, desc="Owner token arrived without data because it was clean";
+ Ack_Owner_All_Tokens, desc="All tokens including owner arrived without data because it was clean";
Tokens, desc="Tokens arrive";
+ Ack_All_Tokens, desc="All_Tokens arrive";
+ Request_Timeout, desc="A DMA request has timed out";
+
+ // Memory Controller
+ Memory_Data, desc="Fetched data from memory arrives";
+ Memory_Ack, desc="Writeback Ack from memory arrives";
+
+ // DMA requests
+ DMA_READ, desc="A DMA Read memory request";
+ DMA_WRITE, desc="A DMA Write memory request";
+ DMA_WRITE_All_Tokens, desc="A DMA Write memory request, directory has all tokens";
}
// TYPES
@@ -73,7 +114,7 @@ machine(Directory, "Token protocol") {
// is 'soft state' that does not need to be correct (as long as
// you're eventually willing to resort to broadcast.)
- Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time.";
+ Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time.";
Set Sharers, desc="Probable sharers of the line. More accurately, the set of processors who need to see a GetX";
}
@@ -82,23 +123,70 @@ machine(Directory, "Token protocol") {
bool isPresent(Address);
}
+ external_type(MemoryControl, inport="yes", outport="yes") {
+
+ }
+
+ external_type(PersistentTable) {
+ void persistentRequestLock(Address, MachineID, AccessType);
+ void persistentRequestUnlock(Address, MachineID);
+ bool okToIssueStarving(Address, MachineID);
+ MachineID findSmallest(Address);
+ AccessType typeOfSmallest(Address);
+ void markEntries(Address);
+ bool isLocked(Address);
+ int countStarvingForAddress(Address);
+ int countReadStarvingForAddress(Address);
+ }
+
+ // TBE entries for DMA requests
+ structure(TBE, desc="TBE entries for outstanding DMA requests") {
+ Address PhysicalAddress, desc="physical address";
+ State TBEState, desc="Transient State";
+ DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory";
+ DataBlock DataBlk, desc="The current view of system memory";
+ int Len, desc="...";
+ MachineID DmaRequestor, desc="DMA requestor";
+ bool WentPersistent, desc="Did the DMA request require a persistent request";
+ }
+
+ external_type(TBETable) {
+ TBE lookup(Address);
+ void allocate(Address);
+ void deallocate(Address);
+ bool isPresent(Address);
+ }
// ** OBJECTS **
- DirectoryMemory directory, constructor_hack="i";
+ DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])';
+
+ MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])';
- PersistentTable persistentTable, constructor_hack="i";
+ PersistentTable persistentTable;
+ TimerTable reissueTimerTable;
+
+ TBETable TBEs, template_hack="<Directory_TBE>";
+
+ bool starving, default="false";
State getState(Address addr) {
- return directory[addr].DirectoryState;
+ if (TBEs.isPresent(addr)) {
+ return TBEs[addr].TBEState;
+ } else {
+ return directory[addr].DirectoryState;
+ }
}
void setState(Address addr, State state) {
+ if (TBEs.isPresent(addr)) {
+ TBEs[addr].TBEState := state;
+ }
directory[addr].DirectoryState := state;
if (state == State:L) {
assert(directory[addr].Tokens == 0);
- }
+ }
// We have one or zero owners
assert((directory[addr].Owner.count() == 0) || (directory[addr].Owner.count() == 1));
@@ -112,19 +200,90 @@ machine(Directory, "Token protocol") {
// assert(directory[addr].Tokens >= (max_tokens() / 2)); // Only mostly true; this might not always hold
}
}
+
+ bool okToIssueStarving(Address addr, MachineID machinID) {
+ return persistentTable.okToIssueStarving(addr, machineID);
+ }
+
+ void markPersistentEntries(Address addr) {
+ persistentTable.markEntries(addr);
+ }
// ** OUT_PORTS **
out_port(responseNetwork_out, ResponseMsg, responseFromDir);
+ out_port(persistentNetwork_out, PersistentMsg, persistentFromDir);
out_port(requestNetwork_out, RequestMsg, requestFromDir);
+ out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir);
+
+ //
+ // Memory buffer for memory controller to DIMM communication
+ //
+ out_port(memQueue_out, MemoryMsg, memBuffer);
// ** IN_PORTS **
+
+ // off-chip memory request/response is done
+ in_port(memQueue_in, MemoryMsg, memBuffer) {
+ if (memQueue_in.isReady()) {
+ peek(memQueue_in, MemoryMsg) {
+ if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
+ trigger(Event:Memory_Data, in_msg.Address);
+ } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
+ trigger(Event:Memory_Ack, in_msg.Address);
+ } else {
+ DEBUG_EXPR(in_msg.Type);
+ error("Invalid message");
+ }
+ }
+ }
+ }
+
+ // Reissue Timer
+ in_port(reissueTimerTable_in, Address, reissueTimerTable) {
+ if (reissueTimerTable_in.isReady()) {
+ trigger(Event:Request_Timeout, reissueTimerTable.readyAddress());
+ }
+ }
+
+ in_port(responseNetwork_in, ResponseMsg, responseToDir) {
+ if (responseNetwork_in.isReady()) {
+ peek(responseNetwork_in, ResponseMsg) {
+ assert(in_msg.Destination.isElement(machineID));
+ if (directory[in_msg.Address].Tokens + in_msg.Tokens == max_tokens()) {
+ if ((in_msg.Type == CoherenceResponseType:DATA_OWNER) ||
+ (in_msg.Type == CoherenceResponseType:DATA_SHARED)) {
+ trigger(Event:Data_All_Tokens, in_msg.Address);
+ } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) {
+ trigger(Event:Ack_Owner_All_Tokens, in_msg.Address);
+ } else if (in_msg.Type == CoherenceResponseType:ACK) {
+ trigger(Event:Ack_All_Tokens, in_msg.Address);
+ } else {
+ DEBUG_EXPR(in_msg.Type);
+ error("Invalid message");
+ }
+ } else {
+ if (in_msg.Type == CoherenceResponseType:DATA_OWNER) {
+ trigger(Event:Data_Owner, in_msg.Address);
+ } else if ((in_msg.Type == CoherenceResponseType:ACK) ||
+ (in_msg.Type == CoherenceResponseType:DATA_SHARED)) {
+ trigger(Event:Tokens, in_msg.Address);
+ } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) {
+ trigger(Event:Ack_Owner, in_msg.Address);
+ } else {
+ DEBUG_EXPR(in_msg.Type);
+ error("Invalid message");
+ }
+ }
+ }
+ }
+ }
in_port(persistentNetwork_in, PersistentMsg, persistentToDir) {
if (persistentNetwork_in.isReady()) {
peek(persistentNetwork_in, PersistentMsg) {
assert(in_msg.Destination.isElement(machineID));
- if (distributedPersistentEnabled()) {
+ if (distributed_persistent) {
// Apply the lockdown or unlockdown message to the table
if (in_msg.Type == PersistentRequestType:GETX_PERSISTENT) {
persistentTable.persistentRequestLock(in_msg.Address, in_msg.Requestor, AccessType:Write);
@@ -173,19 +332,18 @@ machine(Directory, "Token protocol") {
}
}
- in_port(responseNetwork_in, ResponseMsg, responseToDir) {
- if (responseNetwork_in.isReady()) {
- peek(responseNetwork_in, ResponseMsg) {
- assert(in_msg.Destination.isElement(machineID));
- if (in_msg.Type == CoherenceResponseType:DATA_OWNER) {
- trigger(Event:Data_Owner, in_msg.Address);
- } else if ((in_msg.Type == CoherenceResponseType:ACK) ||
- (in_msg.Type == CoherenceResponseType:DATA_SHARED)) {
- trigger(Event:Tokens, in_msg.Address);
- } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) {
- trigger(Event:Ack_Owner, in_msg.Address);
+ in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) {
+ if (dmaRequestQueue_in.isReady()) {
+ peek(dmaRequestQueue_in, DMARequestMsg) {
+ if (in_msg.Type == DMARequestType:READ) {
+ trigger(Event:DMA_READ, in_msg.LineAddress);
+ } else if (in_msg.Type == DMARequestType:WRITE) {
+ if (directory[in_msg.LineAddress].Tokens == max_tokens()) {
+ trigger(Event:DMA_WRITE_All_Tokens, in_msg.LineAddress);
+ } else {
+ trigger(Event:DMA_WRITE, in_msg.LineAddress);
+ }
} else {
- DEBUG_EXPR(in_msg.Type);
error("Invalid message");
}
}
@@ -199,7 +357,7 @@ machine(Directory, "Token protocol") {
if (directory[address].Tokens > 0) {
peek(requestNetwork_in, RequestMsg) {
// enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME?
- enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME?
+ enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME?
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -213,11 +371,151 @@ machine(Directory, "Token protocol") {
}
}
+ action(px_tryIssuingPersistentGETXRequest, "px", desc="...") {
+ if (okToIssueStarving(address, machineID) && (starving == false)) {
+ enqueue(persistentNetwork_out, PersistentMsg, latency = "1") {
+ out_msg.Address := address;
+ out_msg.Type := PersistentRequestType:GETX_PERSISTENT;
+ out_msg.Requestor := machineID;
+ out_msg.RequestorMachine := MachineType:Directory;
+ out_msg.Destination.broadcast(MachineType:L1Cache);
+
+ //
+ // Currently the configuration system limits the system to only one
+ // chip. Therefore, if we assume one shared L2 cache, then only one
+ // pertinent L2 cache exist.
+ //
+ //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Persistent_Control;
+ out_msg.Prefetch := PrefetchBit:No;
+ out_msg.AccessMode := AccessModeType:SupervisorMode;
+ }
+ markPersistentEntries(address);
+ starving := true;
+
+ TBEs[address].WentPersistent := true;
+
+ // Do not schedule a wakeup, a persistent requests will always complete
+ } else {
+
+ // We'd like to issue a persistent request, but are not allowed
+ // to issue a P.R. right now. This, we do not increment the
+ // IssueCount.
+
+ // Set a wakeup timer
+ reissueTimerTable.set(address, 10);
+ }
+ }
+
+ action(bw_broadcastWrite, "bw", desc="Broadcast GETX if we need tokens") {
+ peek(dmaRequestQueue_in, DMARequestMsg) {
+ //
+ // Assser that we only send message if we don't already have all the tokens
+ //
+ assert(directory[address].Tokens != max_tokens());
+ enqueue(requestNetwork_out, RequestMsg, latency = "1") {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceRequestType:GETX;
+ out_msg.Requestor := machineID;
+ out_msg.RequestorMachine := MachineType:Directory;
+
+ //
+ // Since only one chip, assuming all L1 caches are local
+ //
+ out_msg.Destination.broadcast(MachineType:L1Cache);
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
+ out_msg.RetryNum := 0;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Prefetch := PrefetchBit:No;
+ out_msg.AccessMode := AccessModeType:SupervisorMode;
+ }
+ }
+ }
+
+ action(ps_tryIssuingPersistentGETSRequest, "ps", desc="...") {
+ if (okToIssueStarving(address, machineID) && (starving == false)) {
+ enqueue(persistentNetwork_out, PersistentMsg, latency = "1") {
+ out_msg.Address := address;
+ out_msg.Type := PersistentRequestType:GETS_PERSISTENT;
+ out_msg.Requestor := machineID;
+ out_msg.RequestorMachine := MachineType:Directory;
+ out_msg.Destination.broadcast(MachineType:L1Cache);
+
+ //
+ // Currently the configuration system limits the system to only one
+ // chip. Therefore, if we assume one shared L2 cache, then only one
+ // pertinent L2 cache exist.
+ //
+ //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Persistent_Control;
+ out_msg.Prefetch := PrefetchBit:No;
+ out_msg.AccessMode := AccessModeType:SupervisorMode;
+ }
+ markPersistentEntries(address);
+ starving := true;
+
+ TBEs[address].WentPersistent := true;
+
+ // Do not schedule a wakeup, a persistent requests will always complete
+ } else {
+
+ // We'd like to issue a persistent request, but are not allowed
+ // to issue a P.R. right now. This, we do not increment the
+ // IssueCount.
+
+ // Set a wakeup timer
+ reissueTimerTable.set(address, 10);
+ }
+ }
+
+ action(br_broadcastRead, "br", desc="Broadcast GETS for data") {
+ peek(dmaRequestQueue_in, DMARequestMsg) {
+ enqueue(requestNetwork_out, RequestMsg, latency = "1") {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceRequestType:GETS;
+ out_msg.Requestor := machineID;
+ out_msg.RequestorMachine := MachineType:Directory;
+
+ //
+ // Since only one chip, assuming all L1 caches are local
+ //
+ out_msg.Destination.broadcast(MachineType:L1Cache);
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
+ out_msg.RetryNum := 0;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Prefetch := PrefetchBit:No;
+ out_msg.AccessMode := AccessModeType:SupervisorMode;
+ }
+ }
+ }
+
action(aa_sendTokensToStarver, "\a", desc="Send tokens to starver") {
// Only send a message if we have tokens to send
if (directory[address].Tokens > 0) {
// enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME?
- enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME?
+ enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME?
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -230,14 +528,14 @@ machine(Directory, "Token protocol") {
}
}
- action(d_sendDataWithAllTokens, "d", desc="Send data and tokens to requestor") {
- peek(requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") {
+ action(d_sendMemoryDataWithAllTokens, "d", desc="Send data and tokens to requestor") {
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:Directory;
- out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Destination.add(in_msg.OriginalRequestorMachId);
assert(directory[address].Tokens > 0);
out_msg.Tokens := directory[in_msg.Address].Tokens;
out_msg.DataBlk := directory[in_msg.Address].DataBlk;
@@ -249,21 +547,140 @@ machine(Directory, "Token protocol") {
}
action(dd_sendDataWithAllTokensToStarver, "\d", desc="Send data and tokens to starver") {
- enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") {
- out_msg.Address := address;
- out_msg.Type := CoherenceResponseType:DATA_OWNER;
- out_msg.Sender := machineID;
- out_msg.SenderMachine := MachineType:Directory;
- out_msg.Destination.add(persistentTable.findSmallest(address));
- assert(directory[address].Tokens > 0);
- out_msg.Tokens := directory[address].Tokens;
- out_msg.DataBlk := directory[address].DataBlk;
- out_msg.Dirty := false;
- out_msg.MessageSize := MessageSizeType:Response_Data;
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceResponseType:DATA_OWNER;
+ out_msg.Sender := machineID;
+ out_msg.SenderMachine := MachineType:Directory;
+ out_msg.Destination.add(persistentTable.findSmallest(address));
+ assert(directory[address].Tokens > 0);
+ out_msg.Tokens := directory[address].Tokens;
+ out_msg.DataBlk := directory[address].DataBlk;
+ out_msg.Dirty := false;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
}
directory[address].Tokens := 0;
}
+ action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
+ peek(requestNetwork_in, RequestMsg) {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Type := MemoryRequestType:MEMORY_READ;
+ out_msg.Sender := machineID;
+ out_msg.OriginalRequestorMachId := in_msg.Requestor;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.DataBlk := directory[address].DataBlk;
+ DEBUG_EXPR(out_msg);
+ }
+ }
+ }
+
+ action(fd_memoryDma, "fd", desc="Queue off-chip fetch request") {
+ peek(dmaRequestQueue_in, DMARequestMsg) {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Type := MemoryRequestType:MEMORY_READ;
+ out_msg.Sender := machineID;
+ out_msg.OriginalRequestorMachId := in_msg.Requestor;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.DataBlk := directory[address].DataBlk;
+ DEBUG_EXPR(out_msg);
+ }
+ }
+ }
+
+ action(lq_queueMemoryWbRequest, "lq", desc="Write data to memory") {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Type := MemoryRequestType:MEMORY_WB;
+ DEBUG_EXPR(out_msg);
+ }
+ }
+
+ action(ld_queueMemoryDmaWriteFromTbe, "ld", desc="Write DMA data to memory") {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Type := MemoryRequestType:MEMORY_WB;
+ // first, initialize the data blk to the current version of system memory
+ out_msg.DataBlk := TBEs[address].DataBlk;
+ // then add the dma write data
+ out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len);
+ DEBUG_EXPR(out_msg);
+ }
+ }
+
+ action(lr_queueMemoryDmaReadWriteback, "lr", desc="Write DMA data from read to memory") {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Type := MemoryRequestType:MEMORY_WB;
+ // first, initialize the data blk to the current version of system memory
+ out_msg.DataBlk := TBEs[address].DataBlk;
+ DEBUG_EXPR(out_msg);
+ }
+ }
+
+ action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") {
+ peek(dmaRequestQueue_in, DMARequestMsg) {
+ TBEs.allocate(address);
+ TBEs[address].DmaDataBlk := in_msg.DataBlk;
+ TBEs[address].PhysicalAddress := in_msg.PhysicalAddress;
+ TBEs[address].Len := in_msg.Len;
+ TBEs[address].DmaRequestor := in_msg.Requestor;
+ TBEs[address].WentPersistent := false;
+ }
+ }
+
+ action(s_deallocateTBE, "s", desc="Deallocate TBE") {
+
+ if (TBEs[address].WentPersistent) {
+ assert(starving == true);
+
+ enqueue(persistentNetwork_out, PersistentMsg, latency = "1") {
+ out_msg.Address := address;
+ out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT;
+ out_msg.Requestor := machineID;
+ out_msg.RequestorMachine := MachineType:Directory;
+ out_msg.Destination.broadcast(MachineType:L1Cache);
+
+ //
+ // Currently the configuration system limits the system to only one
+ // chip. Therefore, if we assume one shared L2 cache, then only one
+ // pertinent L2 cache exist.
+ //
+ //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address));
+
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:L2Cache,
+ l2_select_low_bit,
+ l2_select_num_bits));
+
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Persistent_Control;
+ }
+ starving := false;
+ }
+
+ TBEs.deallocate(address);
+ }
+
+ action(rd_recordDataInTbe, "rd", desc="Record data in TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ TBEs[address].DataBlk := in_msg.DataBlk;
+ }
+ }
+
+ action(cd_writeCleanDataToTbe, "cd", desc="Write clean memory data to TBE") {
+ TBEs[address].DataBlk := directory[address].DataBlk;
+ }
+
+ action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") {
+ directory[address].DataBlk := TBEs[address].DataBlk;
+ directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len);
+ }
+
action(f_incrementTokens, "f", desc="Increment the number of tokens we're tracking") {
peek(responseNetwork_in, ResponseMsg) {
assert(in_msg.Tokens >= 1);
@@ -275,14 +692,34 @@ machine(Directory, "Token protocol") {
requestNetwork_in.dequeue();
}
+ action(z_recycleRequest, "z", desc="Recycle the request queue") {
+ requestNetwork_in.recycle();
+ }
+
action(k_popIncomingResponseQueue, "k", desc="Pop incoming response queue") {
responseNetwork_in.dequeue();
}
+ action(kz_recycleResponse, "kz", desc="Recycle incoming response queue") {
+ responseNetwork_in.recycle();
+ }
+
action(l_popIncomingPersistentQueue, "l", desc="Pop incoming persistent queue") {
persistentNetwork_in.dequeue();
}
+ action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") {
+ dmaRequestQueue_in.dequeue();
+ }
+
+ action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") {
+ dmaRequestQueue_in.recycle();
+ }
+
+ action(l_popMemQueue, "q", desc="Pop off-chip request queue") {
+ memQueue_in.dequeue();
+ }
+
action(m_writeDataToMemory, "m", desc="Write dirty writeback to memory") {
peek(responseNetwork_in, ResponseMsg) {
directory[in_msg.Address].DataBlk := in_msg.DataBlk;
@@ -291,18 +728,15 @@ machine(Directory, "Token protocol") {
}
}
- action(n_checkIncomingMsg, "n", desc="Check incoming token message") {
+ action(n_checkData, "n", desc="Check incoming clean data message") {
peek(responseNetwork_in, ResponseMsg) {
- assert(in_msg.Type == CoherenceResponseType:ACK_OWNER);
- assert(in_msg.Dirty == false);
- assert(in_msg.MessageSize == MessageSizeType:Writeback_Control);
assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk);
}
}
action(r_bounceResponse, "r", desc="Bounce response to starving processor") {
peek(responseNetwork_in, ResponseMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := in_msg.Type;
out_msg.Sender := machineID;
@@ -316,7 +750,20 @@ machine(Directory, "Token protocol") {
}
}
- action(s_bounceDatalessOwnerToken, "s", desc="Bounce clean owner token to starving processor") {
+ action(st_scheduleTimeout, "st", desc="Schedule Timeout") {
+ //
+ // currently only support a fixed timeout latency
+ //
+ reissueTimerTable.set(address, fixed_timeout_latency);
+ }
+
+ action(ut_unsetReissueTimer, "ut", desc="Unset reissue timer.") {
+ if (reissueTimerTable.isSet(address)) {
+ reissueTimerTable.unset(address);
+ }
+ }
+
+ action(bd_bounceDatalessOwnerToken, "bd", desc="Bounce clean owner token to starving processor") {
peek(responseNetwork_in, ResponseMsg) {
assert(in_msg.Type == CoherenceResponseType:ACK_OWNER);
assert(in_msg.Dirty == false);
@@ -331,7 +778,7 @@ machine(Directory, "Token protocol") {
// Bounce the message, but "re-associate" the data and the owner
// token. In essence we're converting an ACK_OWNER message to a
// DATA_OWNER message, keeping the number of tokens the same.
- enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_OWNER;
out_msg.Sender := machineID;
@@ -346,53 +793,212 @@ machine(Directory, "Token protocol") {
}
}
+ action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") {
+ enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") {
+ out_msg.PhysicalAddress := address;
+ out_msg.LineAddress := address;
+ out_msg.Type := DMAResponseType:ACK;
+ out_msg.Destination.add(TBEs[address].DmaRequestor);
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+
+ action(dm_sendMemoryDataToDma, "dm", desc="Send Data to DMA controller from memory") {
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") {
+ out_msg.PhysicalAddress := address;
+ out_msg.LineAddress := address;
+ out_msg.Type := DMAResponseType:DATA;
+ //
+ // we send the entire data block and rely on the dma controller to
+ // split it up if need be
+ //
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Destination.add(TBEs[address].DmaRequestor);
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+ }
+
+ action(dd_sendDmaData, "dd", desc="Send Data to DMA controller") {
+ peek(responseNetwork_in, ResponseMsg) {
+ enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") {
+ out_msg.PhysicalAddress := address;
+ out_msg.LineAddress := address;
+ out_msg.Type := DMAResponseType:DATA;
+ //
+ // we send the entire data block and rely on the dma controller to
+ // split it up if need be
+ //
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Destination.add(TBEs[address].DmaRequestor);
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+ }
// TRANSITIONS
- // Trans. from O
- transition(O, GETX, NO) {
- d_sendDataWithAllTokens;
+ //
+ // Trans. from base state O
+ // the directory has valid data
+ //
+ transition(O, GETX, NO_W) {
+ qf_queueMemoryFetchRequest;
j_popIncomingRequestQueue;
}
- transition(O, GETS, NO) {
- d_sendDataWithAllTokens;
+ transition(O, DMA_WRITE, O_DW) {
+ vd_allocateDmaRequestInTBE;
+ bw_broadcastWrite;
+ st_scheduleTimeout;
+ p_popDmaRequestQueue;
+ }
+
+ transition(O, DMA_WRITE_All_Tokens, O_DW_W) {
+ vd_allocateDmaRequestInTBE;
+ cd_writeCleanDataToTbe;
+ dwt_writeDmaDataFromTBE;
+ ld_queueMemoryDmaWriteFromTbe;
+ p_popDmaRequestQueue;
+ }
+
+ transition(O, GETS, NO_W) {
+ qf_queueMemoryFetchRequest;
j_popIncomingRequestQueue;
}
- transition(O, Lockdown, L) {
- dd_sendDataWithAllTokensToStarver;
+ transition(O, DMA_READ, O_DR_W) {
+ vd_allocateDmaRequestInTBE;
+ fd_memoryDma;
+ st_scheduleTimeout;
+ p_popDmaRequestQueue;
+ }
+
+ transition(O, Lockdown, L_W) {
+ qf_queueMemoryFetchRequest;
+ l_popIncomingPersistentQueue;
+ }
+
+ transition(O, {Tokens, Ack_All_Tokens}) {
+ f_incrementTokens;
+ k_popIncomingResponseQueue;
+ }
+
+ transition(O, {Data_Owner, Data_All_Tokens}) {
+ n_checkData;
+ f_incrementTokens;
+ k_popIncomingResponseQueue;
+ }
+
+ //
+ // transitioning to Owner, waiting for memory before DMA ack
+ // All other events should recycle/stall
+ //
+ transition(O_DR_W, Memory_Data, O) {
+ dm_sendMemoryDataToDma;
+ ut_unsetReissueTimer;
+ s_deallocateTBE;
+ l_popMemQueue;
+ }
+
+ //
+ // issued GETX for DMA write, waiting for all tokens
+ //
+ transition(O_DW, Tokens) {
+ f_incrementTokens;
+ k_popIncomingResponseQueue;
+ }
+
+ transition(O_DW, Data_Owner) {
+ f_incrementTokens;
+ rd_recordDataInTbe;
+ k_popIncomingResponseQueue;
+ }
+
+ transition(O_DW, Ack_Owner) {
+ f_incrementTokens;
+ cd_writeCleanDataToTbe;
+ k_popIncomingResponseQueue;
+ }
+
+ transition(O_DW, Lockdown, DW_L) {
l_popIncomingPersistentQueue;
}
- transition(O, Tokens) {
+ transition({NO_DW, O_DW}, Data_All_Tokens, O_DW_W) {
f_incrementTokens;
+ rd_recordDataInTbe;
+ dwt_writeDmaDataFromTBE;
+ ld_queueMemoryDmaWriteFromTbe;
+ ut_unsetReissueTimer;
k_popIncomingResponseQueue;
}
+ transition(O_DW, Ack_All_Tokens, O_DW_W) {
+ f_incrementTokens;
+ dwt_writeDmaDataFromTBE;
+ ld_queueMemoryDmaWriteFromTbe;
+ ut_unsetReissueTimer;
+ k_popIncomingResponseQueue;
+ }
+
+ transition(O_DW, Ack_Owner_All_Tokens, O_DW_W) {
+ f_incrementTokens;
+ cd_writeCleanDataToTbe;
+ dwt_writeDmaDataFromTBE;
+ ld_queueMemoryDmaWriteFromTbe;
+ ut_unsetReissueTimer;
+ k_popIncomingResponseQueue;
+ }
+
+ transition(O_DW_W, Memory_Ack, O) {
+ da_sendDmaAck;
+ s_deallocateTBE;
+ l_popMemQueue;
+ }
+
+ //
// Trans. from NO
+ // The direcotry does not have valid data, but may have some tokens
+ //
transition(NO, GETX) {
a_sendTokens;
j_popIncomingRequestQueue;
}
+ transition(NO, DMA_WRITE, NO_DW) {
+ vd_allocateDmaRequestInTBE;
+ bw_broadcastWrite;
+ st_scheduleTimeout;
+ p_popDmaRequestQueue;
+ }
+
transition(NO, GETS) {
j_popIncomingRequestQueue;
}
+ transition(NO, DMA_READ, NO_DR) {
+ vd_allocateDmaRequestInTBE;
+ br_broadcastRead;
+ st_scheduleTimeout;
+ p_popDmaRequestQueue;
+ }
+
transition(NO, Lockdown, L) {
aa_sendTokensToStarver;
l_popIncomingPersistentQueue;
}
- transition(NO, Data_Owner, O) {
+ transition(NO, {Data_Owner, Data_All_Tokens}, O_W) {
m_writeDataToMemory;
f_incrementTokens;
+ lq_queueMemoryWbRequest;
k_popIncomingResponseQueue;
}
- transition(NO, Ack_Owner, O) {
- n_checkIncomingMsg;
+ transition(NO, {Ack_Owner, Ack_Owner_All_Tokens}, O) {
+ n_checkData;
f_incrementTokens;
k_popIncomingResponseQueue;
}
@@ -402,34 +1008,156 @@ machine(Directory, "Token protocol") {
k_popIncomingResponseQueue;
}
+ transition(NO_W, Memory_Data, NO) {
+ d_sendMemoryDataWithAllTokens;
+ l_popMemQueue;
+ }
+
+ // Trans. from NO_DW
+ transition(NO_DW, Request_Timeout) {
+ ut_unsetReissueTimer;
+ px_tryIssuingPersistentGETXRequest;
+ }
+
+ transition(NO_DW, Lockdown, DW_L) {
+ aa_sendTokensToStarver;
+ l_popIncomingPersistentQueue;
+ }
+
+ // Note: NO_DW, Data_All_Tokens transition is combined with O_DW
+ // Note: NO_DW should not receive the action Ack_All_Tokens because the
+ // directory does not have valid data
+
+ transition(NO_DW, Data_Owner, O_DW) {
+ f_incrementTokens;
+ rd_recordDataInTbe;
+ lq_queueMemoryWbRequest;
+ k_popIncomingResponseQueue;
+ }
+
+ transition({NO_DW, NO_DR}, Tokens) {
+ f_incrementTokens;
+ k_popIncomingResponseQueue;
+ }
+
+ // Trans. from NO_DR
+ transition(NO_DR, Request_Timeout) {
+ ut_unsetReissueTimer;
+ ps_tryIssuingPersistentGETSRequest;
+ }
+
+ transition(NO_DR, Lockdown, DR_L) {
+ aa_sendTokensToStarver;
+ l_popIncomingPersistentQueue;
+ }
+
+ transition(NO_DR, {Data_Owner, Data_All_Tokens}, O_W) {
+ m_writeDataToMemory;
+ f_incrementTokens;
+ dd_sendDmaData;
+ lr_queueMemoryDmaReadWriteback;
+ ut_unsetReissueTimer;
+ s_deallocateTBE;
+ k_popIncomingResponseQueue;
+ }
+
// Trans. from L
- transition(L, {GETX, GETS}) {
+ transition({L, DW_L, DR_L}, {GETX, GETS}) {
j_popIncomingRequestQueue;
}
- transition(L, Lockdown) {
+ transition({L, DW_L, DR_L, L_W, DR_L_W}, Lockdown) {
l_popIncomingPersistentQueue;
}
- // we could change this to write the data to memory and send it cleanly
- transition(L, Data_Owner) {
+ //
+ // Received data for lockdown blocks
+ // For blocks with outstanding dma requests to them
+ // ...we could change this to write the data to memory and send it cleanly
+ // ...we could also proactively complete our DMA requests
+ // However, to keep my mind from spinning out-of-control, we won't for now :)
+ //
+ transition({DW_L, DR_L, L}, {Data_Owner, Data_All_Tokens}) {
r_bounceResponse;
k_popIncomingResponseQueue;
}
- transition(L, Tokens) {
+ transition({DW_L, DR_L, L}, Tokens) {
r_bounceResponse;
k_popIncomingResponseQueue;
}
- transition(L, Ack_Owner) {
- s_bounceDatalessOwnerToken;
+ transition({DW_L, DR_L, L}, {Ack_Owner_All_Tokens, Ack_Owner}) {
+ bd_bounceDatalessOwnerToken;
k_popIncomingResponseQueue;
}
-
transition(L, Unlockdown, NO) {
l_popIncomingPersistentQueue;
}
+ transition(L_W, Memory_Data, L) {
+ dd_sendDataWithAllTokensToStarver;
+ l_popMemQueue;
+ }
+
+ transition(DR_L_W, Memory_Data, DR_L) {
+ dd_sendDataWithAllTokensToStarver;
+ l_popMemQueue;
+ }
+
+ transition(DW_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DW) {
+ l_popIncomingPersistentQueue;
+ }
+
+ transition(DR_L_W, {Unlockdown, Own_Lock_or_Unlock}, O_DR_W) {
+ l_popIncomingPersistentQueue;
+ }
+
+ transition({DW_L, DR_L_W}, Request_Timeout) {
+ ut_unsetReissueTimer;
+ px_tryIssuingPersistentGETXRequest;
+ }
+
+ transition(DR_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DR) {
+ l_popIncomingPersistentQueue;
+ }
+
+ transition(DR_L, Request_Timeout) {
+ ut_unsetReissueTimer;
+ ps_tryIssuingPersistentGETSRequest;
+ }
+
+ transition(O_W, Memory_Ack, O) {
+ l_popMemQueue;
+ }
+
+ transition({O, NO, L, O_DW, NO_DW, NO_DR}, Own_Lock_or_Unlock) {
+ l_popIncomingPersistentQueue;
+ }
+
+ // Blocked states
+ transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR}, {GETX, GETS}) {
+ z_recycleRequest;
+ }
+
+ transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR, L, DW_L, DR_L}, {DMA_READ, DMA_WRITE}) {
+ y_recycleDmaRequestQueue;
+ }
+
+ transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W}, {Data_Owner, Ack_Owner, Tokens}) {
+ kz_recycleResponse;
+ }
+
+ transition({NO_W, O_W}, Lockdown, L_W) {
+ l_popIncomingPersistentQueue;
+ }
+
+ transition(O_DR_W, Lockdown, DR_L_W) {
+ l_popIncomingPersistentQueue;
+ }
+
+ transition({NO_W, O_W, O_DR_W}, {Unlockdown, Own_Lock_or_Unlock}) {
+ l_popIncomingPersistentQueue;
+ }
}
diff --git a/src/mem/protocol/MOESI_CMP_token-dma.sm b/src/mem/protocol/MOESI_CMP_token-dma.sm
new file mode 100644
index 000000000..550a36ae0
--- /dev/null
+++ b/src/mem/protocol/MOESI_CMP_token-dma.sm
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+machine(DMA, "DMA Controller")
+: int request_latency
+{
+
+ MessageBuffer responseFromDir, network="From", virtual_network="0", ordered="true", no_vector="true";
+ MessageBuffer reqToDirectory, network="To", virtual_network="5", ordered="false", no_vector="true";
+
+ enumeration(State, desc="DMA states", default="DMA_State_READY") {
+ READY, desc="Ready to accept a new request";
+ BUSY_RD, desc="Busy: currently processing a request";
+ BUSY_WR, desc="Busy: currently processing a request";
+ }
+
+ enumeration(Event, desc="DMA events") {
+ ReadRequest, desc="A new read request";
+ WriteRequest, desc="A new write request";
+ Data, desc="Data from a DMA memory read";
+ Ack, desc="DMA write to memory completed";
+ }
+
+ external_type(DMASequencer) {
+ void ackCallback();
+ void dataCallback(DataBlock);
+ }
+
+ MessageBuffer mandatoryQueue, ordered="false", no_vector="true";
+ DMASequencer dma_sequencer, factory='RubySystem::getDMASequencer(m_cfg["dma_sequencer"])', no_vector="true";
+ State cur_state, no_vector="true";
+
+ State getState(Address addr) {
+ return cur_state;
+ }
+ void setState(Address addr, State state) {
+ cur_state := state;
+ }
+
+ out_port(reqToDirectory_out, DMARequestMsg, reqToDirectory, desc="...");
+
+ in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") {
+ if (dmaRequestQueue_in.isReady()) {
+ peek(dmaRequestQueue_in, SequencerMsg) {
+ if (in_msg.Type == SequencerRequestType:LD ) {
+ trigger(Event:ReadRequest, in_msg.LineAddress);
+ } else if (in_msg.Type == SequencerRequestType:ST) {
+ trigger(Event:WriteRequest, in_msg.LineAddress);
+ } else {
+ error("Invalid request type");
+ }
+ }
+ }
+ }
+
+ in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") {
+ if (dmaResponseQueue_in.isReady()) {
+ peek( dmaResponseQueue_in, DMAResponseMsg) {
+ if (in_msg.Type == DMAResponseType:ACK) {
+ trigger(Event:Ack, in_msg.LineAddress);
+ } else if (in_msg.Type == DMAResponseType:DATA) {
+ trigger(Event:Data, in_msg.LineAddress);
+ } else {
+ error("Invalid response type");
+ }
+ }
+ }
+ }
+
+ action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") {
+ peek(dmaRequestQueue_in, SequencerMsg) {
+ enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
+ out_msg.PhysicalAddress := in_msg.PhysicalAddress;
+ out_msg.LineAddress := in_msg.LineAddress;
+ out_msg.Type := DMARequestType:READ;
+ out_msg.Requestor := machineID;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Len := in_msg.Len;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") {
+ peek(dmaRequestQueue_in, SequencerMsg) {
+ enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
+ out_msg.PhysicalAddress := in_msg.PhysicalAddress;
+ out_msg.LineAddress := in_msg.LineAddress;
+ out_msg.Type := DMARequestType:WRITE;
+ out_msg.Requestor := machineID;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Len := in_msg.Len;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(a_ackCallback, "a", desc="Notify dma controller that write request completed") {
+ peek (dmaResponseQueue_in, DMAResponseMsg) {
+ dma_sequencer.ackCallback();
+ }
+ }
+
+ action(d_dataCallback, "d", desc="Write data to dma sequencer") {
+ peek (dmaResponseQueue_in, DMAResponseMsg) {
+ dma_sequencer.dataCallback(in_msg.DataBlk);
+ }
+ }
+
+ action(p_popRequestQueue, "p", desc="Pop request queue") {
+ dmaRequestQueue_in.dequeue();
+ }
+
+ action(p_popResponseQueue, "\p", desc="Pop request queue") {
+ dmaResponseQueue_in.dequeue();
+ }
+
+ transition(READY, ReadRequest, BUSY_RD) {
+ s_sendReadRequest;
+ p_popRequestQueue;
+ }
+
+ transition(READY, WriteRequest, BUSY_WR) {
+ s_sendWriteRequest;
+ p_popRequestQueue;
+ }
+
+ transition(BUSY_RD, Data, READY) {
+ d_dataCallback;
+ p_popResponseQueue;
+ }
+
+ transition(BUSY_WR, Ack, READY) {
+ a_ackCallback;
+ p_popResponseQueue;
+ }
+}
diff --git a/src/mem/protocol/MOESI_CMP_token-msg.sm b/src/mem/protocol/MOESI_CMP_token-msg.sm
index 2a75ce644..40c16b5e1 100644
--- a/src/mem/protocol/MOESI_CMP_token-msg.sm
+++ b/src/mem/protocol/MOESI_CMP_token-msg.sm
@@ -59,8 +59,10 @@ enumeration(CoherenceResponseType, desc="...") {
// TriggerType
enumeration(TriggerType, desc="...") {
- REQUEST_TIMEOUT, desc="See corresponding event";
+ REQUEST_TIMEOUT, desc="See corresponding event";
USE_TIMEOUT, desc="See corresponding event";
+ DATA, desc="data for dma read response";
+ DATA_ALL_TOKENS, desc="data and all tokens for dma write response";
}
// TriggerMsg
@@ -111,13 +113,45 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") {
MessageSizeType MessageSize, desc="size category of the message";
}
-GenericRequestType convertToGenericType(CoherenceRequestType type) {
- if(type == CoherenceRequestType:GETS) {
- return GenericRequestType:GETS;
- } else if(type == CoherenceRequestType:GETX) {
- return GenericRequestType:GETX;
- } else {
- DEBUG_EXPR(type);
- error("invalid CoherenceRequestType");
- }
+enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") {
+ READ, desc="Memory Read";
+ WRITE, desc="Memory Write";
+ NULL, desc="Invalid";
}
+
+enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") {
+ DATA, desc="DATA read";
+ ACK, desc="ACK write";
+ NULL, desc="Invalid";
+}
+
+structure(DMARequestMsg, desc="...", interface="NetworkMessage") {
+ DMARequestType Type, desc="Request type (read/write)";
+ Address PhysicalAddress, desc="Physical address for this request";
+ Address LineAddress, desc="Line address for this request";
+ MachineID Requestor, desc="Node who initiated the request";
+ NetDest Destination, desc="Destination";
+ DataBlock DataBlk, desc="DataBlk attached to this request";
+ int Len, desc="The length of the request";
+ MessageSizeType MessageSize, desc="size category of the message";
+}
+
+structure(DMAResponseMsg, desc="...", interface="NetworkMessage") {
+ DMAResponseType Type, desc="Response type (DATA/ACK)";
+ Address PhysicalAddress, desc="Physical address for this request";
+ Address LineAddress, desc="Line address for this request";
+ NetDest Destination, desc="Destination";
+ DataBlock DataBlk, desc="DataBlk attached to this request";
+ MessageSizeType MessageSize, desc="size category of the message";
+}
+
+//GenericRequestType convertToGenericType(CoherenceRequestType type) {
+// if(type == CoherenceRequestType:GETS) {
+// return GenericRequestType:GETS;
+// } else if(type == CoherenceRequestType:GETX) {
+// return GenericRequestType:GETX;
+// } else {
+// DEBUG_EXPR(type);
+// error("invalid CoherenceRequestType");
+// }
+//}
diff --git a/src/mem/protocol/MOESI_CMP_token.slicc b/src/mem/protocol/MOESI_CMP_token.slicc
index ae4a6d6ec..a41226f90 100644
--- a/src/mem/protocol/MOESI_CMP_token.slicc
+++ b/src/mem/protocol/MOESI_CMP_token.slicc
@@ -2,4 +2,5 @@ MOESI_CMP_token-msg.sm
MOESI_CMP_token-L1cache.sm
MOESI_CMP_token-L2cache.sm
MOESI_CMP_token-dir.sm
+MOESI_CMP_token-dma.sm
standard_CMP-protocol.sm
diff --git a/src/mem/protocol/RubySlicc_Util.sm b/src/mem/protocol/RubySlicc_Util.sm
index 312682bd7..e1771448f 100644
--- a/src/mem/protocol/RubySlicc_Util.sm
+++ b/src/mem/protocol/RubySlicc_Util.sm
@@ -52,7 +52,6 @@ void dirProfileCoherenceRequest(NodeID node, bool needCLB);
bool isPerfectProtocol();
bool L1trainsPrefetcher();
int max_tokens();
-int N_tokens();
bool distributedPersistentEnabled();
Address setOffset(Address addr, int offset);
Address makeLineAddress(Address addr);
diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript
index 0c8423c85..3559f042f 100644
--- a/src/mem/ruby/SConscript
+++ b/src/mem/ruby/SConscript
@@ -114,6 +114,7 @@ MakeInclude('system/MachineID.hh')
MakeInclude('system/MemoryControl.hh')
MakeInclude('system/NodeID.hh')
MakeInclude('system/PerfectCacheMemory.hh')
+MakeInclude('system/PersistentTable.hh')
MakeInclude('system/Sequencer.hh')
MakeInclude('system/TBETable.hh')
MakeInclude('system/TimerTable.hh')
diff --git a/src/mem/ruby/common/NetDest.cc b/src/mem/ruby/common/NetDest.cc
index 32771235f..35bb4ec43 100644
--- a/src/mem/ruby/common/NetDest.cc
+++ b/src/mem/ruby/common/NetDest.cc
@@ -133,13 +133,14 @@ NodeID NetDest::elementAt(MachineID index) {
return m_bits[vecIndex(index)].elementAt(bitIndex(index.num));
}
-NodeID NetDest::smallestElement() const
+MachineID NetDest::smallestElement() const
{
assert(count() > 0);
for (int i=0; i<m_bits.size(); i++) {
for (int j=0; j<m_bits[i].getSize(); j++) {
if (m_bits[i].isElement(j)) {
- return j;
+ MachineID mach = {MachineType_from_base_level(i), j};
+ return mach;
}
}
}
diff --git a/src/mem/ruby/common/NetDest.hh b/src/mem/ruby/common/NetDest.hh
index 7301409ce..071106623 100644
--- a/src/mem/ruby/common/NetDest.hh
+++ b/src/mem/ruby/common/NetDest.hh
@@ -96,7 +96,7 @@ public:
//For Princeton Network
Vector<NodeID> getAllDest();
- NodeID smallestElement() const;
+ MachineID smallestElement() const;
MachineID smallestElement(MachineType machine) const;
void setSize();
diff --git a/src/mem/ruby/config/MOESI_CMP_token.rb b/src/mem/ruby/config/MOESI_CMP_token.rb
new file mode 100644
index 000000000..ba963dc06
--- /dev/null
+++ b/src/mem/ruby/config/MOESI_CMP_token.rb
@@ -0,0 +1,92 @@
+
+require "cfg.rb"
+require "util.rb"
+
+
+class MOESI_CMP_token_L1CacheController < L1CacheController
+ attr :icache, :dcache
+ attr :num_l2_controllers
+ attr :n_tokens
+ def initialize(obj_name, mach_type, icache, dcache, sequencer, num_l2_controllers, n_tokens)
+ super(obj_name, mach_type, [icache, dcache], sequencer)
+ @icache = icache
+ @dcache = dcache
+ @num_l2_controllers = num_l2_controllers
+ @n_tokens = n_tokens
+ end
+ def argv()
+ num_select_bits = log_int(num_l2_controllers)
+ num_block_bits = log_int(RubySystem.block_size_bytes)
+
+ l2_select_low_bit = num_block_bits
+
+ vec = super()
+ vec += " icache " + @icache.obj_name
+ vec += " dcache " + @dcache.obj_name
+ vec += " l1_request_latency " + l1_request_latency.to_s
+ vec += " l1_response_latency " + l1_response_latency.to_s
+ vec += " l2_select_low_bit " + l2_select_low_bit.to_s
+ vec += " l2_select_num_bits " + num_select_bits.to_s
+ vec += " N_tokens " + n_tokens.to_s
+ vec += " retry_threshold " + retry_threshold.to_s
+ vec += " fixed_timeout_latency " + fixed_timeout_latency.to_s
+ vec += " dynamic_timeout_enabled " + dynamic_timeout_enabled.to_s
+
+ return vec
+ end
+end
+
+class MOESI_CMP_token_L2CacheController < CacheController
+ attr :cache
+ attr :n_tokens
+ def initialize(obj_name, mach_type, cache, n_tokens)
+ super(obj_name, mach_type, [cache])
+ @cache = cache
+ @n_tokens = n_tokens
+ end
+ def argv()
+ vec = super()
+ vec += " cache " + @cache.obj_name
+ vec += " l2_request_latency " + l2_request_latency.to_s
+ vec += " l2_response_latency " + l2_response_latency.to_s
+ vec += " N_tokens " + n_tokens.to_s
+ vec += " filtering_enabled " + filtering_enabled.to_s
+ return vec
+ end
+end
+
+
+class MOESI_CMP_token_DirectoryController < DirectoryController
+ attr :num_l2_controllers
+ def initialize(obj_name, mach_type, directory, memory_control, num_l2_controllers)
+ super(obj_name, mach_type, directory, memory_control)
+ @num_l2_controllers = num_l2_controllers
+ end
+ def argv()
+ num_select_bits = log_int(num_l2_controllers)
+ num_block_bits = log_int(RubySystem.block_size_bytes)
+
+ l2_select_low_bit = num_block_bits
+
+ vec = super()
+ vec += " directory_latency "+directory_latency.to_s
+ vec += " l2_select_low_bit " + l2_select_low_bit.to_s
+ vec += " l2_select_num_bits " + num_select_bits.to_s
+ vec += " distributed_persistent "+distributed_persistent.to_s
+ vec += " fixed_timeout_latency " + fixed_timeout_latency.to_s
+ return vec
+ end
+
+end
+
+class MOESI_CMP_token_DMAController < DMAController
+ def initialize(obj_name, mach_type, dma_sequencer)
+ super(obj_name, mach_type, dma_sequencer)
+ end
+ def argv()
+ vec = super
+ vec += " request_latency "+request_latency.to_s
+ vec += " response_latency "+response_latency.to_s
+ return vec
+ end
+end
diff --git a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
index 83020742e..566055f74 100644
--- a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
+++ b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
@@ -12,13 +12,13 @@ RubySystem.reset
# default values
num_cores = 2
-l1_icache_size_kb = 32
+l1_icache_size_bytes = 32768
l1_icache_assoc = 8
l1_icache_latency = 1
-l1_dcache_size_kb = 32
+l1_dcache_size_bytes = 32768
l1_dcache_assoc = 8
l1_dcache_latency = 1
-l2_cache_size_kb = 2048 # total size (sum of all banks)
+l2_cache_size_bytes = 2048 # total size (sum of all banks)
l2_cache_assoc = 16
l2_cache_latency = 12
num_l2_banks = num_cores
@@ -26,7 +26,7 @@ num_memories = 1
memory_size_mb = 1024
num_dma = 1
-protocol = "MESI_CMP_directory"
+protocol = "MOESI_CMP_token"
# check for overrides
@@ -43,9 +43,20 @@ for i in 0..$*.size-1 do
elsif $*[i] == "-s"
memory_size_mb = $*[i+1].to_i
i = i + 1
+ elsif $*[i] == "-C"
+ l1_dcache_size_bytes = $*[i+1].to_i
+ i = i + 1
+ elsif $*[i] == "-A"
+ l1_dcache_assoc = $*[i+1].to_i
+ i = i + 1
+ elsif $*[i] == "-D"
+ num_dma = $*[i+1].to_i
+ i = i + 1
end
end
+n_tokens = num_cores + 1
+
net_ports = Array.new
iface_ports = Array.new
@@ -54,10 +65,19 @@ iface_ports = Array.new
require protocol+".rb"
num_cores.times { |n|
- icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
- dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
+ icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_bytes, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
+ dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_bytes, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
sequencer = Sequencer.new("Sequencer_"+n.to_s, icache, dcache)
iface_ports << sequencer
+ if protocol == "MOESI_CMP_token"
+ net_ports << MOESI_CMP_token_L1CacheController.new("L1CacheController_"+n.to_s,
+ "L1Cache",
+ icache, dcache,
+ sequencer,
+ num_l2_banks,
+ n_tokens)
+ end
+
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s,
"L1Cache",
@@ -75,7 +95,14 @@ num_cores.times { |n|
end
}
num_l2_banks.times { |n|
- cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_kb/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
+ cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_bytes/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
+ if protocol == "MOESI_CMP_token"
+ net_ports << MOESI_CMP_token_L2CacheController.new("L2CacheController_"+n.to_s,
+ "L2Cache",
+ cache,
+ n_tokens)
+ end
+
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
"L2Cache",
@@ -93,6 +120,14 @@ num_l2_banks.times { |n|
num_memories.times { |n|
directory = DirectoryMemory.new("DirectoryMemory_"+n.to_s, memory_size_mb/num_memories)
memory_control = MemoryControl.new("MemoryControl_"+n.to_s)
+ if protocol == "MOESI_CMP_token"
+ net_ports << MOESI_CMP_token_DirectoryController.new("DirectoryController_"+n.to_s,
+ "Directory",
+ directory,
+ memory_control,
+ num_l2_banks)
+ end
+
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s,
"Directory",
@@ -111,6 +146,12 @@ num_memories.times { |n|
num_dma.times { |n|
dma_sequencer = DMASequencer.new("DMASequencer_"+n.to_s)
iface_ports << dma_sequencer
+ if protocol == "MOESI_CMP_token"
+ net_ports << MOESI_CMP_token_DMAController.new("DMAController_"+n.to_s,
+ "DMA",
+ dma_sequencer)
+ end
+
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_DMAController.new("DMAController_"+n.to_s,
"DMA",
diff --git a/src/mem/ruby/config/cfg.rb b/src/mem/ruby/config/cfg.rb
index f2564e1d3..c470ca92f 100644
--- a/src/mem/ruby/config/cfg.rb
+++ b/src/mem/ruby/config/cfg.rb
@@ -538,7 +538,6 @@ class MemoryControl < LibRubyObject
end
end
-
class Sequencer < IfacePort
def cppClassName()
diff --git a/src/mem/ruby/config/defaults.rb b/src/mem/ruby/config/defaults.rb
index bb054ec4e..f338f4e3f 100644
--- a/src/mem/ruby/config/defaults.rb
+++ b/src/mem/ruby/config/defaults.rb
@@ -167,6 +167,33 @@ class MOESI_CMP_directory_DMAController < DMAController
default_param :response_latency, Integer, 6
end
+## MOESI_CMP_token protocol
+
+class MOESI_CMP_token_L1CacheController < L1CacheController
+ default_param :l1_request_latency, Integer, 2
+ default_param :l1_response_latency, Integer, 2
+ default_param :retry_threshold, Integer, 1
+ default_param :fixed_timeout_latency, Integer, 300
+ default_param :dynamic_timeout_enabled, Boolean, true
+end
+
+class MOESI_CMP_token_L2CacheController < CacheController
+ default_param :l2_request_latency, Integer, 2
+ default_param :l2_response_latency, Integer, 2
+ default_param :filtering_enabled, Boolean, true
+end
+
+class MOESI_CMP_token_DirectoryController < DirectoryController
+ default_param :directory_latency, Integer, 6
+ default_param :distributed_persistent, Boolean, true
+ default_param :fixed_timeout_latency, Integer, 300
+end
+
+class MOESI_CMP_token_DMAController < DMAController
+ default_param :request_latency, Integer, 6
+ default_param :response_latency, Integer, 6
+end
+
## MOESI_hammer protocol
class MOESI_hammer_CacheController < L1CacheController
diff --git a/src/mem/ruby/system/PersistentTable.cc b/src/mem/ruby/system/PersistentTable.cc
index 1e056f6e5..58b67ea60 100644
--- a/src/mem/ruby/system/PersistentTable.cc
+++ b/src/mem/ruby/system/PersistentTable.cc
@@ -27,44 +27,33 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "PersistentTable.hh"
-#include "NetDest.h"
-#include "Map.h"
-#include "Address.h"
-#include "AbstractChip.h"
-#include "util.h"
+#include "mem/ruby/system/PersistentTable.hh"
+#include "mem/gems_common/util.hh"
// randomize so that handoffs are not locality-aware
// int persistent_randomize[] = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15};
// int persistent_randomize[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-class PersistentTableEntry {
-public:
- NetDest m_starving;
- NetDest m_marked;
- NetDest m_request_to_write;
-};
-
-PersistentTable::PersistentTable(AbstractChip* chip_ptr, int version)
+PersistentTable::PersistentTable()
{
- m_chip_ptr = chip_ptr;
m_map_ptr = new Map<Address, PersistentTableEntry>;
- m_version = version;
}
PersistentTable::~PersistentTable()
{
delete m_map_ptr;
m_map_ptr = NULL;
- m_chip_ptr = NULL;
}
-void PersistentTable::persistentRequestLock(const Address& address, MachineID locker, AccessType type)
+void PersistentTable::persistentRequestLock(const Address& address,
+ MachineID locker,
+ AccessType type)
{
// if (locker == m_chip_ptr->getID() )
- // cout << "Chip " << m_chip_ptr->getID() << ": " << llocker << " requesting lock for " << address << endl;
+ // cout << "Chip " << m_chip_ptr->getID() << ": " << llocker
+ // << " requesting lock for " << address << endl;
// MachineID locker = (MachineID) persistent_randomize[llocker];
@@ -79,7 +68,11 @@ void PersistentTable::persistentRequestLock(const Address& address, MachineID lo
m_map_ptr->add(address, entry);
} else {
PersistentTableEntry& entry = m_map_ptr->lookup(address);
- assert(!(entry.m_starving.isElement(locker))); // Make sure we're not already in the locked set
+
+ //
+ // Make sure we're not already in the locked set
+ //
+ assert(!(entry.m_starving.isElement(locker)));
entry.m_starving.add(locker);
if (type == AccessType_Write) {
@@ -89,17 +82,23 @@ void PersistentTable::persistentRequestLock(const Address& address, MachineID lo
}
}
-void PersistentTable::persistentRequestUnlock(const Address& address, MachineID unlocker)
+void PersistentTable::persistentRequestUnlock(const Address& address,
+ MachineID unlocker)
{
// if (unlocker == m_chip_ptr->getID() )
- // cout << "Chip " << m_chip_ptr->getID() << ": " << uunlocker << " requesting unlock for " << address << endl;
+ // cout << "Chip " << m_chip_ptr->getID() << ": " << uunlocker
+ // << " requesting unlock for " << address << endl;
// MachineID unlocker = (MachineID) persistent_randomize[uunlocker];
assert(address == line_address(address));
assert(m_map_ptr->exist(address));
PersistentTableEntry& entry = m_map_ptr->lookup(address);
- assert(entry.m_starving.isElement(unlocker)); // Make sure we're in the locked set
+
+ //
+ // Make sure we're in the locked set
+ //
+ assert(entry.m_starving.isElement(unlocker));
assert(entry.m_marked.isSubset(entry.m_starving));
entry.m_starving.remove(unlocker);
entry.m_marked.remove(unlocker);
@@ -113,13 +112,20 @@ void PersistentTable::persistentRequestUnlock(const Address& address, MachineID
}
}
-bool PersistentTable::okToIssueStarving(const Address& address) const
+bool PersistentTable::okToIssueStarving(const Address& address,
+ MachineID machId) const
{
assert(address == line_address(address));
if (!m_map_ptr->exist(address)) {
- return true; // No entry present
- } else if (m_map_ptr->lookup(address).m_starving.isElement( (MachineID) {MachineType_L1Cache, m_version})) {
- return false; // We can't issue another lockdown until are previous unlock has occurred
+ //
+ // No entry present
+ //
+ return true;
+ } else if (m_map_ptr->lookup(address).m_starving.isElement(machId)) {
+ //
+ // We can't issue another lockdown until are previous unlock has occurred
+ //
+ return false;
} else {
return (m_map_ptr->lookup(address).m_marked.isEmpty());
}
@@ -130,9 +136,7 @@ MachineID PersistentTable::findSmallest(const Address& address) const
assert(address == line_address(address));
assert(m_map_ptr->exist(address));
const PersistentTableEntry& entry = m_map_ptr->lookup(address);
- // cout << "Node " << m_chip_ptr->getID() << " returning " << persistent_randomize[entry.m_starving.smallestElement()] << " for findSmallest(" << address << ")" << endl;
- // return (MachineID) persistent_randomize[entry.m_starving.smallestElement()];
- return (MachineID) { MachineType_L1Cache, entry.m_starving.smallestElement() };
+ return entry.m_starving.smallestElement();
}
AccessType PersistentTable::typeOfSmallest(const Address& address) const
@@ -140,7 +144,7 @@ AccessType PersistentTable::typeOfSmallest(const Address& address) const
assert(address == line_address(address));
assert(m_map_ptr->exist(address));
const PersistentTableEntry& entry = m_map_ptr->lookup(address);
- if (entry.m_request_to_write.isElement((MachineID) {MachineType_L1Cache, entry.m_starving.smallestElement()})) {
+ if (entry.m_request_to_write.isElement(entry.m_starving.smallestElement())) {
return AccessType_Write;
} else {
return AccessType_Read;
@@ -152,8 +156,16 @@ void PersistentTable::markEntries(const Address& address)
assert(address == line_address(address));
if (m_map_ptr->exist(address)) {
PersistentTableEntry& entry = m_map_ptr->lookup(address);
- assert(entry.m_marked.isEmpty()); // None should be marked
- entry.m_marked = entry.m_starving; // Mark all the nodes currently in the table
+
+ //
+ // None should be marked
+ //
+ assert(entry.m_marked.isEmpty());
+
+ //
+ // Mark all the nodes currently in the table
+ //
+ entry.m_marked = entry.m_starving;
}
}
@@ -177,7 +189,6 @@ int PersistentTable::countStarvingForAddress(const Address& address) const
int PersistentTable::countReadStarvingForAddress(const Address& address) const
{
- int count = 0;
if (m_map_ptr->exist(address)) {
PersistentTableEntry& entry = m_map_ptr->lookup(address);
return (entry.m_starving.count() - entry.m_request_to_write.count());
@@ -187,4 +198,7 @@ int PersistentTable::countReadStarvingForAddress(const Address& address) const
}
}
+void PersistentTable::print(ostream& out) const
+{
+}
diff --git a/src/mem/ruby/system/PersistentTable.hh b/src/mem/ruby/system/PersistentTable.hh
index ab000843d..8cbb48817 100644
--- a/src/mem/ruby/system/PersistentTable.hh
+++ b/src/mem/ruby/system/PersistentTable.hh
@@ -30,20 +30,26 @@
#ifndef PersistentTable_H
#define PersistentTable_H
-#include "Global.h"
-#include "MachineID.h"
-#include "AccessType.h"
+#include "mem/ruby/common/Global.hh"
+#include "mem/gems_common/Map.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/system/MachineID.hh"
+#include "mem/protocol/AccessType.hh"
+#include "mem/ruby/common/NetDest.hh"
-class AbstractChip;
+class PersistentTableEntry {
+public:
+ void print(ostream& out) const {}
-template <class KEY_TYPE, class VALUE_TYPE> class Map;
-class Address;
-class PersistentTableEntry;
+ NetDest m_starving;
+ NetDest m_marked;
+ NetDest m_request_to_write;
+};
class PersistentTable {
public:
// Constructors
- PersistentTable(AbstractChip* chip_ptr, int version);
+ PersistentTable();
// Destructor
~PersistentTable();
@@ -51,7 +57,7 @@ public:
// Public Methods
void persistentRequestLock(const Address& address, MachineID locker, AccessType type);
void persistentRequestUnlock(const Address& address, MachineID unlocker);
- bool okToIssueStarving(const Address& address) const;
+ bool okToIssueStarving(const Address& address, MachineID machID) const;
MachineID findSmallest(const Address& address) const;
AccessType typeOfSmallest(const Address& address) const;
void markEntries(const Address& address);
@@ -71,17 +77,12 @@ private:
// Data Members (m_prefix)
Map<Address, PersistentTableEntry>* m_map_ptr;
- AbstractChip* m_chip_ptr;
- int m_version;
};
-// Output operator declaration
-ostream& operator<<(ostream& out, const PersistentTable& obj);
-
// ******************* Definitions *******************
// Output operator definition
-extern inline
+extern inline
ostream& operator<<(ostream& out, const PersistentTable& obj)
{
obj.print(out);
@@ -89,4 +90,13 @@ ostream& operator<<(ostream& out, const PersistentTable& obj)
return out;
}
+// Output operator definition
+extern inline
+ostream& operator<<(ostream& out, const PersistentTableEntry& obj)
+{
+ obj.print(out);
+ out << flush;
+ return out;
+}
+
#endif //PersistentTable_H
diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript
index 496fce2fd..4ca1af114 100644
--- a/src/mem/ruby/system/SConscript
+++ b/src/mem/ruby/system/SConscript
@@ -38,6 +38,7 @@ Source('DirectoryMemory.cc')
Source('CacheMemory.cc')
Source('MemoryControl.cc')
Source('MemoryNode.cc')
+Source('PersistentTable.cc')
Source('RubyPort.cc')
Source('Sequencer.cc', Werror=False)
Source('System.cc')