summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configs/ruby/MOESI_hammer.py7
-rw-r--r--src/mem/protocol/MOESI_hammer-cache.sm20
-rw-r--r--src/mem/protocol/MOESI_hammer-dir.sm262
-rw-r--r--src/mem/protocol/MOESI_hammer-msg.sm4
-rw-r--r--src/mem/protocol/RubySlicc_Exports.sm1
-rw-r--r--src/mem/ruby/network/Network.cc1
6 files changed, 244 insertions, 51 deletions
diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py
index 17c1b20c4..df1c1d3e7 100644
--- a/configs/ruby/MOESI_hammer.py
+++ b/configs/ruby/MOESI_hammer.py
@@ -55,7 +55,9 @@ def define_options(parser):
help="allow migratory sharing for atomic only accessed blocks")
parser.add_option("--pf-on", action="store_true",
help="Hammer: enable Probe Filter")
-
+ parser.add_option("--dir-on", action="store_true",
+ help="Hammer: enable Full-bit Directory")
+
def create_system(options, system, piobus, dma_devices):
if buildEnv['PROTOCOL'] != 'MOESI_hammer':
@@ -165,7 +167,8 @@ def create_system(options, system, piobus, dma_devices):
options.map_levels),
probeFilter = pf,
memBuffer = mem_cntrl,
- probe_filter_enabled = options.pf_on)
+ probe_filter_enabled = options.pf_on,
+ full_bit_dir_enabled = options.dir_on)
if options.recycle_latency:
dir_cntrl.recycle_latency = options.recycle_latency
diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index 6739f628e..1f14db4f7 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -137,6 +137,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
bool Dirty, desc="Is the data dirty (different than memory)?";
int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
bool Sharers, desc="On a GetS, did we find any other sharers in the system";
+ bool AppliedSilentAcks, default="false", desc="for full-bit dir, does the pending msg count reflect the silent acks";
MachineID LastResponder, desc="last machine to send a response for this request";
MachineID CurOwner, desc="current owner of the block, used for UnblockS responses";
Time InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
@@ -526,6 +527,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
} else {
out_msg.Acks := 2;
}
+ out_msg.SilentAcks := in_msg.SilentAcks;
out_msg.MessageSize := MessageSizeType:Response_Data;
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
@@ -558,6 +560,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
} else {
out_msg.Acks := 2;
}
+ out_msg.SilentAcks := in_msg.SilentAcks;
out_msg.MessageSize := MessageSizeType:Response_Data;
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
@@ -581,6 +584,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
} else {
out_msg.Acks := 2;
}
+ out_msg.SilentAcks := in_msg.SilentAcks;
out_msg.MessageSize := MessageSizeType:Response_Data;
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
@@ -600,6 +604,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
out_msg.Dirty := cache_entry.Dirty;
DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk);
out_msg.Acks := machineCount(MachineType:L1Cache);
+ out_msg.SilentAcks := in_msg.SilentAcks;
out_msg.MessageSize := MessageSizeType:Response_Data;
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
@@ -615,6 +620,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
out_msg.Sender := machineID;
out_msg.Destination.add(in_msg.Requestor);
out_msg.Acks := 1;
+ out_msg.SilentAcks := in_msg.SilentAcks;
assert(in_msg.DirectedProbe == false);
out_msg.MessageSize := MessageSizeType:Response_Control;
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
@@ -631,6 +637,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
out_msg.Sender := machineID;
out_msg.Destination.add(in_msg.Requestor);
out_msg.Acks := 1;
+ out_msg.SilentAcks := in_msg.SilentAcks;
assert(in_msg.DirectedProbe == false);
out_msg.MessageSize := MessageSizeType:Response_Control;
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
@@ -779,9 +786,17 @@ machine(L1Cache, "AMD Hammer-like protocol")
peek(responseToCache_in, ResponseMsg) {
assert(in_msg.Acks > 0);
assert(is_valid(tbe));
+ DPRINTF(RubySlicc, "Sender = %s\n", in_msg.Sender);
+ DPRINTF(RubySlicc, "SilentAcks = %d\n", in_msg.SilentAcks);
+ if (tbe.AppliedSilentAcks == false) {
+ tbe.NumPendingMsgs := tbe.NumPendingMsgs - in_msg.SilentAcks;
+ tbe.AppliedSilentAcks := true;
+ }
DPRINTF(RubySlicc, "%d\n", tbe.NumPendingMsgs);
tbe.NumPendingMsgs := tbe.NumPendingMsgs - in_msg.Acks;
DPRINTF(RubySlicc, "%d\n", tbe.NumPendingMsgs);
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingMsgs);
+ APPEND_TRANSITION_COMMENT(in_msg.Sender);
tbe.LastResponder := in_msg.Sender;
if (tbe.InitialRequestTime != zero_time() && in_msg.InitialRequestTime != zero_time()) {
assert(tbe.InitialRequestTime == in_msg.InitialRequestTime);
@@ -844,6 +859,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") {
peek(forwardToCache_in, RequestMsg) {
+ assert(in_msg.Requestor != machineID);
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
assert(is_valid(tbe));
out_msg.Address := address;
@@ -858,6 +874,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
} else {
out_msg.Acks := 2;
}
+ out_msg.SilentAcks := in_msg.SilentAcks;
out_msg.MessageSize := MessageSizeType:Response_Data;
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
@@ -877,6 +894,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
out_msg.DataBlk := tbe.DataBlk;
out_msg.Dirty := tbe.Dirty;
out_msg.Acks := machineCount(MachineType:L1Cache);
+ out_msg.SilentAcks := in_msg.SilentAcks;
out_msg.MessageSize := MessageSizeType:Response_Data;
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
@@ -1387,7 +1405,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
n_popResponseQueue;
}
- transition(SM, Data, ISM) {
+ transition(SM, {Data, Exclusive_Data}, ISM) {
v_writeDataToCacheVerify;
m_decrementNumberOfMessages;
o_checkForCompletion;
diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm
index 8f9ce2360..1d18fd489 100644
--- a/src/mem/protocol/MOESI_hammer-dir.sm
+++ b/src/mem/protocol/MOESI_hammer-dir.sm
@@ -38,7 +38,8 @@ machine(Directory, "AMD Hammer-like protocol")
CacheMemory * probeFilter,
MemoryControl * memBuffer,
int memory_controller_latency = 2,
- bool probe_filter_enabled = false
+ bool probe_filter_enabled = false,
+ bool full_bit_dir_enabled = false
{
MessageBuffer forwardFromDir, network="To", virtual_network="3", ordered="false";
@@ -140,6 +141,7 @@ machine(Directory, "AMD Hammer-like protocol")
State PfState, desc="Directory state";
MachineID Owner, desc="Owner node";
DataBlock DataBlk, desc="data for the block";
+ Set Sharers, desc="sharing vector for full bit directory";
}
// TBE entries for DMA requests
@@ -148,6 +150,7 @@ machine(Directory, "AMD Hammer-like protocol")
State TBEState, desc="Transient State";
CoherenceResponseType ResponseType, desc="The type for the subsequent response message";
int Acks, default="0", desc="The number of acks that the waiting response represents";
+ int SilentAcks, default="0", desc="The number of silent acks associated with this transaction";
DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory";
DataBlock DataBlk, desc="The current view of system memory";
int Len, desc="...";
@@ -173,6 +176,8 @@ machine(Directory, "AMD Hammer-like protocol")
// ** OBJECTS **
+ Set fwd_set;
+
TBETable TBEs, template_hack="<Directory_TBE>";
Entry getDirectoryEntry(Address addr), return_by_ref="yes" {
@@ -191,7 +196,7 @@ machine(Directory, "AMD Hammer-like protocol")
if (is_valid(tbe)) {
return tbe.TBEState;
} else {
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
if (is_valid(pf_entry)) {
assert(pf_entry.PfState == getDirectoryEntry(addr).DirectoryState);
} else {
@@ -206,7 +211,7 @@ machine(Directory, "AMD Hammer-like protocol")
if (is_valid(tbe)) {
tbe.TBEState := state;
}
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
if (is_valid(pf_entry)) {
pf_entry.PfState := state;
}
@@ -349,7 +354,7 @@ machine(Directory, "AMD Hammer-like protocol")
if (in_msg.Type == CoherenceRequestType:PUT) {
trigger(Event:PUT, in_msg.Address, pf_entry, tbe);
} else {
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
if (is_valid(pf_entry)) {
trigger(cache_request_to_event(in_msg.Type), in_msg.Address,
pf_entry, tbe);
@@ -392,26 +397,44 @@ machine(Directory, "AMD Hammer-like protocol")
// Actions
action(r_setMRU, "\rr", desc="manually set the MRU bit for pf entry" ) {
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
assert(is_valid(cache_entry));
probeFilter.setMRU(address);
}
}
action(auno_assertUnblockerNotOwner, "auno", desc="assert unblocker not owner") {
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
assert(is_valid(cache_entry));
peek(unblockNetwork_in, ResponseMsg) {
assert(cache_entry.Owner != in_msg.Sender);
+ if (full_bit_dir_enabled) {
+ assert(cache_entry.Sharers.isElement(machineIDToNodeID(in_msg.Sender)) == false);
+ }
}
}
}
action(uo_updateOwnerIfPf, "uo", desc="update owner") {
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
assert(is_valid(cache_entry));
peek(unblockNetwork_in, ResponseMsg) {
cache_entry.Owner := in_msg.Sender;
+ if (full_bit_dir_enabled) {
+ cache_entry.Sharers.clear();
+ cache_entry.Sharers.add(machineIDToNodeID(in_msg.Sender));
+ APPEND_TRANSITION_COMMENT(cache_entry.Sharers);
+ DPRINTF(RubySlicc, "Sharers = %d\n", cache_entry.Sharers);
+ }
+ }
+ }
+ }
+
+ action(us_updateSharerIfFBD, "us", desc="update sharer if full-bit directory") {
+ if (full_bit_dir_enabled) {
+ assert(probeFilter.isTagPresent(address));
+ peek(unblockNetwork_in, ResponseMsg) {
+ cache_entry.Sharers.add(machineIDToNodeID(in_msg.Sender));
}
}
}
@@ -441,7 +464,7 @@ machine(Directory, "AMD Hammer-like protocol")
}
action(pfa_probeFilterAllocate, "pfa", desc="Allocate ProbeFilterEntry") {
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
peek(requestQueue_in, RequestMsg) {
set_cache_entry(probeFilter.allocate(address, new PfEntry));
cache_entry.Owner := in_msg.Requestor;
@@ -450,14 +473,14 @@ machine(Directory, "AMD Hammer-like protocol")
}
action(pfd_probeFilterDeallocate, "pfd", desc="Deallocate ProbeFilterEntry") {
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
probeFilter.deallocate(address);
unset_cache_entry();
}
}
action(ppfd_possibleProbeFilterDeallocate, "ppfd", desc="Deallocate ProbeFilterEntry") {
- if (probe_filter_enabled && is_valid(cache_entry)) {
+ if ((probe_filter_enabled || full_bit_dir_enabled) && is_valid(cache_entry)) {
probeFilter.deallocate(address);
unset_cache_entry();
}
@@ -495,7 +518,12 @@ machine(Directory, "AMD Hammer-like protocol")
action(pa_setPendingMsgsToAll, "pa", desc="set pending msgs to all") {
assert(is_valid(tbe));
- tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
+ if (full_bit_dir_enabled) {
+ assert(is_valid(cache_entry));
+ tbe.NumPendingMsgs := cache_entry.Sharers.count();
+ } else {
+ tbe.NumPendingMsgs := machineCount(MachineType:L1Cache);
+ }
}
action(po_setPendingMsgsToOne, "po", desc="set pending msgs to one") {
@@ -510,13 +538,34 @@ machine(Directory, "AMD Hammer-like protocol")
action(sa_setAcksToOne, "sa", desc="Forwarded request, set the ack amount to one") {
assert(is_valid(tbe));
- tbe.Acks := 1;
- }
+ peek(requestQueue_in, RequestMsg) {
+ if (full_bit_dir_enabled) {
+ assert(is_valid(cache_entry));
+ //
+ // If we are using the full-bit directory and no sharers exists beyond
+ // the requestor, then we must set the ack number to all, not one
+ //
+ fwd_set := cache_entry.Sharers;
+ fwd_set.remove(machineIDToNodeID(in_msg.Requestor));
+ if (fwd_set.count() > 0) {
+ tbe.Acks := 1;
+ tbe.SilentAcks := machineCount(MachineType:L1Cache) - fwd_set.count();
+ tbe.SilentAcks := tbe.SilentAcks - 1;
+ } else {
+ tbe.Acks := machineCount(MachineType:L1Cache);
+ tbe.SilentAcks := 0;
+ }
+ } else {
+ tbe.Acks := 1;
+ }
+ }
+ }
action(saa_setAcksToAllIfPF, "saa", desc="Non-forwarded request, set the ack amount to all") {
assert(is_valid(tbe));
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
tbe.Acks := machineCount(MachineType:L1Cache);
+ tbe.SilentAcks := 0;
} else {
tbe.Acks := 1;
}
@@ -590,7 +639,7 @@ machine(Directory, "AMD Hammer-like protocol")
}
action(spa_setPendingAcksToZeroIfPF, "spa", desc="if probe filter, no need to wait for acks") {
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
assert(is_valid(tbe));
tbe.NumPendingMsgs := 0;
}
@@ -599,7 +648,7 @@ machine(Directory, "AMD Hammer-like protocol")
action(sc_signalCompletionIfPF, "sc", desc="indicate that we should skip waiting for cpu acks") {
assert(is_valid(tbe));
if (tbe.NumPendingMsgs == 0) {
- assert(probe_filter_enabled);
+ assert(probe_filter_enabled || full_bit_dir_enabled);
enqueue(triggerQueue_out, TriggerMsg) {
out_msg.Address := address;
out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS;
@@ -619,6 +668,7 @@ machine(Directory, "AMD Hammer-like protocol")
DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk);
out_msg.Dirty := false; // By definition, the block is now clean
out_msg.Acks := tbe.Acks;
+ out_msg.SilentAcks := tbe.SilentAcks;
DPRINTF(RubySlicc, "%d\n", out_msg.Acks);
assert(out_msg.Acks > 0);
out_msg.MessageSize := MessageSizeType:Response_Data;
@@ -683,7 +733,17 @@ machine(Directory, "AMD Hammer-like protocol")
action(r_recordDataInTBE, "rt", desc="Record Data in TBE") {
peek(requestQueue_in, RequestMsg) {
assert(is_valid(tbe));
- tbe.ResponseType := CoherenceResponseType:DATA;
+ if (full_bit_dir_enabled) {
+ fwd_set := cache_entry.Sharers;
+ fwd_set.remove(machineIDToNodeID(in_msg.Requestor));
+ if (fwd_set.count() > 0) {
+ tbe.ResponseType := CoherenceResponseType:DATA;
+ } else {
+ tbe.ResponseType := CoherenceResponseType:DATA_EXCLUSIVE;
+ }
+ } else {
+ tbe.ResponseType := CoherenceResponseType:DATA;
+ }
}
}
@@ -736,16 +796,37 @@ machine(Directory, "AMD Hammer-like protocol")
action(fn_forwardRequestIfNecessary, "fn", desc="Forward requests if necessary") {
assert(is_valid(tbe));
if ((machineCount(MachineType:L1Cache) > 1) && (tbe.Acks <= 1)) {
- peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
- out_msg.Address := address;
- out_msg.Type := in_msg.Type;
- out_msg.Requestor := in_msg.Requestor;
- out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
- out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor
- out_msg.MessageSize := MessageSizeType:Broadcast_Control;
- out_msg.InitialRequestTime := in_msg.InitialRequestTime;
- out_msg.ForwardRequestTime := get_time();
+ if (full_bit_dir_enabled) {
+ assert(is_valid(cache_entry));
+ peek(requestQueue_in, RequestMsg) {
+ fwd_set := cache_entry.Sharers;
+ fwd_set.remove(machineIDToNodeID(in_msg.Requestor));
+ if (fwd_set.count() > 0) {
+ enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
+ out_msg.Address := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.setNetDest(MachineType:L1Cache, fwd_set);
+ out_msg.MessageSize := MessageSizeType:Multicast_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := get_time();
+ assert(tbe.SilentAcks > 0);
+ out_msg.SilentAcks := tbe.SilentAcks;
+ }
+ }
+ }
+ } else {
+ peek(requestQueue_in, RequestMsg) {
+ enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
+ out_msg.Address := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
+ out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor
+ out_msg.MessageSize := MessageSizeType:Broadcast_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := get_time();
+ }
}
}
}
@@ -753,12 +834,25 @@ machine(Directory, "AMD Hammer-like protocol")
action(ia_invalidateAllRequest, "ia", desc="invalidate all copies") {
if (machineCount(MachineType:L1Cache) > 1) {
- enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
- out_msg.Address := address;
- out_msg.Type := CoherenceRequestType:INV;
- out_msg.Requestor := machineID;
- out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
- out_msg.MessageSize := MessageSizeType:Broadcast_Control;
+ if (full_bit_dir_enabled) {
+ assert(cache_entry.Sharers.count() > 0);
+ peek(requestQueue_in, RequestMsg) {
+ enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceRequestType:INV;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.setNetDest(MachineType:L1Cache, cache_entry.Sharers);
+ out_msg.MessageSize := MessageSizeType:Multicast_Control;
+ }
+ }
+ } else {
+ enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceRequestType:INV;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
+ out_msg.MessageSize := MessageSizeType:Broadcast_Control;
+ }
}
}
}
@@ -780,15 +874,33 @@ machine(Directory, "AMD Hammer-like protocol")
action(fb_forwardRequestBcast, "fb", desc="Forward requests to all nodes") {
if (machineCount(MachineType:L1Cache) > 1) {
peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
- out_msg.Address := address;
- out_msg.Type := in_msg.Type;
- out_msg.Requestor := in_msg.Requestor;
- out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
- out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor
- out_msg.MessageSize := MessageSizeType:Broadcast_Control;
- out_msg.InitialRequestTime := in_msg.InitialRequestTime;
- out_msg.ForwardRequestTime := get_time();
+ if (full_bit_dir_enabled) {
+ fwd_set := cache_entry.Sharers;
+ fwd_set.remove(machineIDToNodeID(in_msg.Requestor));
+ if (fwd_set.count() > 0) {
+ enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
+ out_msg.Address := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.setNetDest(MachineType:L1Cache, fwd_set);
+ out_msg.MessageSize := MessageSizeType:Multicast_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := get_time();
+ out_msg.SilentAcks := machineCount(MachineType:L1Cache) - fwd_set.count();
+ out_msg.SilentAcks := out_msg.SilentAcks - 1;
+ }
+ }
+ } else {
+ enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
+ out_msg.Address := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
+ out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor
+ out_msg.MessageSize := MessageSizeType:Broadcast_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := get_time();
+ }
}
}
}
@@ -820,7 +932,7 @@ machine(Directory, "AMD Hammer-like protocol")
action(fc_forwardRequestConditionalOwner, "fc", desc="Forward request to one or more nodes") {
assert(machineCount(MachineType:L1Cache) > 1);
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
peek(requestQueue_in, RequestMsg) {
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
assert(is_valid(cache_entry));
@@ -979,7 +1091,7 @@ machine(Directory, "AMD Hammer-like protocol")
}
action(ano_assertNotOwner, "ano", desc="Assert that request is not current owner") {
- if (probe_filter_enabled) {
+ if (probe_filter_enabled || full_bit_dir_enabled) {
peek(requestQueue_in, RequestMsg) {
assert(is_valid(cache_entry));
assert(cache_entry.Owner != in_msg.Requestor);
@@ -987,6 +1099,32 @@ machine(Directory, "AMD Hammer-like protocol")
}
}
+ action(ans_assertNotSharer, "ans", desc="Assert that request is not a current sharer") {
+ if (full_bit_dir_enabled) {
+ peek(requestQueue_in, RequestMsg) {
+ assert(cache_entry.Sharers.isElement(machineIDToNodeID(in_msg.Requestor)) == false);
+ }
+ }
+ }
+
+ action(rs_removeSharer, "s", desc="remove current sharer") {
+ if (full_bit_dir_enabled) {
+ peek(unblockNetwork_in, ResponseMsg) {
+ assert(cache_entry.Sharers.isElement(machineIDToNodeID(in_msg.Sender)));
+ cache_entry.Sharers.remove(machineIDToNodeID(in_msg.Sender));
+ }
+ }
+ }
+
+ action(cs_clearSharers, "cs", desc="clear current sharers") {
+ if (full_bit_dir_enabled) {
+ peek(requestQueue_in, RequestMsg) {
+ cache_entry.Sharers.clear();
+ cache_entry.Sharers.add(machineIDToNodeID(in_msg.Requestor));
+ }
+ }
+ }
+
action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") {
peek(unblockNetwork_in, ResponseMsg) {
enqueue(memQueue_out, MemoryMsg, latency="1") {
@@ -1077,6 +1215,7 @@ machine(Directory, "AMD Hammer-like protocol")
sa_setAcksToOne;
qf_queueMemoryFetchRequest;
fb_forwardRequestBcast;
+ cs_clearSharers;
i_popIncomingRequestQueue;
}
@@ -1139,6 +1278,7 @@ machine(Directory, "AMD Hammer-like protocol")
transition(NX, GETX, NO_B) {
r_setMRU;
fb_forwardRequestBcast;
+ cs_clearSharers;
i_popIncomingRequestQueue;
}
@@ -1147,12 +1287,14 @@ machine(Directory, "AMD Hammer-like protocol")
r_setMRU;
ano_assertNotOwner;
fc_forwardRequestConditionalOwner;
+ cs_clearSharers;
i_popIncomingRequestQueue;
}
transition(S, GETX, NO_B) {
r_setMRU;
fb_forwardRequestBcast;
+ cs_clearSharers;
i_popIncomingRequestQueue;
}
@@ -1163,7 +1305,15 @@ machine(Directory, "AMD Hammer-like protocol")
i_popIncomingRequestQueue;
}
- transition({NX, NO}, GETS, NO_B) {
+ transition(NO, GETS, NO_B) {
+ r_setMRU;
+ ano_assertNotOwner;
+ ans_assertNotSharer;
+ fc_forwardRequestConditionalOwner;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(NX, GETS, NO_B) {
r_setMRU;
ano_assertNotOwner;
fc_forwardRequestConditionalOwner;
@@ -1211,7 +1361,7 @@ machine(Directory, "AMD Hammer-like protocol")
z_stallAndWaitRequest;
}
- transition({NO_B, NO_B_S, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
+ transition({NO_B_X, NO_B, NO_B_S, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W,
NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R},
{DMA_READ, DMA_WRITE}) {
@@ -1232,17 +1382,20 @@ machine(Directory, "AMD Hammer-like protocol")
// unblock responses
transition({NO_B, NO_B_X}, UnblockS, NX) {
+ us_updateSharerIfFBD;
k_wakeUpDependents;
j_popIncomingUnblockQueue;
}
transition({NO_B, NO_B_X}, UnblockM, NO) {
uo_updateOwnerIfPf;
+ us_updateSharerIfFBD;
k_wakeUpDependents;
j_popIncomingUnblockQueue;
}
transition(NO_B_S, UnblockS, NO_B_S_W) {
+ us_updateSharerIfFBD;
fr_forwardMergeReadRequestsToOwner;
sp_setPendingMsgsToMergedSharers;
j_popIncomingUnblockQueue;
@@ -1256,6 +1409,7 @@ machine(Directory, "AMD Hammer-like protocol")
}
transition(NO_B_S_W, UnblockS) {
+ us_updateSharerIfFBD;
mu_decrementNumberOfUnblocks;
os_checkForMergedGetSCompletion;
j_popIncomingUnblockQueue;
@@ -1268,6 +1422,14 @@ machine(Directory, "AMD Hammer-like protocol")
}
transition(O_B, UnblockS, O) {
+ us_updateSharerIfFBD;
+ k_wakeUpDependents;
+ j_popIncomingUnblockQueue;
+ }
+
+ transition(O_B, UnblockM, NO) {
+ us_updateSharerIfFBD;
+ uo_updateOwnerIfPf;
k_wakeUpDependents;
j_popIncomingUnblockQueue;
}
@@ -1505,10 +1667,12 @@ machine(Directory, "AMD Hammer-like protocol")
}
transition(NO_B_W, UnblockS, NO_W) {
+ us_updateSharerIfFBD;
j_popIncomingUnblockQueue;
}
transition(O_B_W, UnblockS, O_W) {
+ us_updateSharerIfFBD;
j_popIncomingUnblockQueue;
}
@@ -1527,12 +1691,14 @@ machine(Directory, "AMD Hammer-like protocol")
// WB State Transistions
transition(WB, Writeback_Dirty, WB_O_W) {
l_writeDataToMemory;
+ rs_removeSharer;
l_queueMemoryWBRequest;
j_popIncomingUnblockQueue;
}
transition(WB, Writeback_Exclusive_Dirty, WB_E_W) {
l_writeDataToMemory;
+ rs_removeSharer;
l_queueMemoryWBRequest;
j_popIncomingUnblockQueue;
}
@@ -1550,18 +1716,20 @@ machine(Directory, "AMD Hammer-like protocol")
transition(WB, Writeback_Clean, O) {
ll_checkIncomingWriteback;
+ rs_removeSharer;
k_wakeUpDependents;
j_popIncomingUnblockQueue;
}
transition(WB, Writeback_Exclusive_Clean, E) {
ll_checkIncomingWriteback;
+ rs_removeSharer;
pfd_probeFilterDeallocate;
k_wakeUpDependents;
j_popIncomingUnblockQueue;
}
- transition(WB, Unblock, NO) {
+ transition(WB, Unblock, NX) {
auno_assertUnblockerNotOwner;
k_wakeUpDependents;
j_popIncomingUnblockQueue;
diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm
index c90c8a53c..063cc91ee 100644
--- a/src/mem/protocol/MOESI_hammer-msg.sm
+++ b/src/mem/protocol/MOESI_hammer-msg.sm
@@ -83,6 +83,7 @@ structure(RequestMsg, desc="...", interface="NetworkMessage") {
bool DirectedProbe, default="false", desc="probe filter directed probe";
Time InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
Time ForwardRequestTime, default="0", desc="time the dir forwarded the request";
+ int SilentAcks, default="0", desc="silent acks from the full-bit directory";
}
// ResponseMsg (and also unblock requests)
@@ -94,10 +95,11 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") {
NetDest Destination, desc="Node to whom the data is sent";
DataBlock DataBlk, desc="data for the cache line";
bool Dirty, desc="Is the data dirty (different than memory)?";
- int Acks, desc="How many messages this counts as";
+ int Acks, default="0", desc="How many messages this counts as";
MessageSizeType MessageSize, desc="size category of the message";
Time InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
Time ForwardRequestTime, default="0", desc="time the dir forwarded the request";
+ int SilentAcks, default="0", desc="silent acks from the full-bit directory";
}
enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") {
diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm
index 574307341..2799be55d 100644
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -173,6 +173,7 @@ enumeration(MessageSizeType, default="MessageSizeType_Undefined", desc="...") {
Writeback_Data, desc="Writeback data";
Writeback_Control, desc="Writeback control";
Broadcast_Control, desc="Broadcast control";
+ Multicast_Control, desc="Multicast control";
Forwarded_Control, desc="Forwarded control";
Invalidate_Control, desc="Invalidate control";
Unblock_Control, desc="Unblock control";
diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc
index 3e866907f..34ee18fea 100644
--- a/src/mem/ruby/network/Network.cc
+++ b/src/mem/ruby/network/Network.cc
@@ -73,6 +73,7 @@ Network::MessageSizeType_to_int(MessageSizeType size_type)
case MessageSizeType_Response_Control:
case MessageSizeType_Writeback_Control:
case MessageSizeType_Broadcast_Control:
+ case MessageSizeType_Multicast_Control:
case MessageSizeType_Forwarded_Control:
case MessageSizeType_Invalidate_Control:
case MessageSizeType_Unblock_Control: