diff options
-rw-r--r-- | configs/ruby/MOESI_hammer.py | 7 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-cache.sm | 20 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-dir.sm | 262 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-msg.sm | 4 | ||||
-rw-r--r-- | src/mem/protocol/RubySlicc_Exports.sm | 1 | ||||
-rw-r--r-- | src/mem/ruby/network/Network.cc | 1 |
6 files changed, 244 insertions, 51 deletions
diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py index 17c1b20c4..df1c1d3e7 100644 --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -55,7 +55,9 @@ def define_options(parser): help="allow migratory sharing for atomic only accessed blocks") parser.add_option("--pf-on", action="store_true", help="Hammer: enable Probe Filter") - + parser.add_option("--dir-on", action="store_true", + help="Hammer: enable Full-bit Directory") + def create_system(options, system, piobus, dma_devices): if buildEnv['PROTOCOL'] != 'MOESI_hammer': @@ -165,7 +167,8 @@ def create_system(options, system, piobus, dma_devices): options.map_levels), probeFilter = pf, memBuffer = mem_cntrl, - probe_filter_enabled = options.pf_on) + probe_filter_enabled = options.pf_on, + full_bit_dir_enabled = options.dir_on) if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index 6739f628e..1f14db4f7 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -137,6 +137,7 @@ machine(L1Cache, "AMD Hammer-like protocol") bool Dirty, desc="Is the data dirty (different than memory)?"; int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; bool Sharers, desc="On a GetS, did we find any other sharers in the system"; + bool AppliedSilentAcks, default="false", desc="for full-bit dir, does the pending msg count reflect the silent acks"; MachineID LastResponder, desc="last machine to send a response for this request"; MachineID CurOwner, desc="current owner of the block, used for UnblockS responses"; Time InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache"; @@ -526,6 +527,7 @@ machine(L1Cache, "AMD Hammer-like protocol") } else { out_msg.Acks := 2; } + out_msg.SilentAcks := in_msg.SilentAcks; out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; @@ -558,6 +560,7 @@ machine(L1Cache, "AMD Hammer-like protocol") } else { out_msg.Acks := 2; } + out_msg.SilentAcks := in_msg.SilentAcks; out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; @@ -581,6 +584,7 @@ machine(L1Cache, "AMD Hammer-like protocol") } else { out_msg.Acks := 2; } + out_msg.SilentAcks := in_msg.SilentAcks; out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; @@ -600,6 +604,7 @@ machine(L1Cache, "AMD Hammer-like protocol") out_msg.Dirty := cache_entry.Dirty; DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.SilentAcks := in_msg.SilentAcks; out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; @@ -615,6 +620,7 @@ machine(L1Cache, "AMD Hammer-like protocol") out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.Acks := 1; + out_msg.SilentAcks := in_msg.SilentAcks; assert(in_msg.DirectedProbe == false); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.InitialRequestTime := in_msg.InitialRequestTime; @@ -631,6 +637,7 @@ machine(L1Cache, "AMD Hammer-like protocol") out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.Acks := 1; + out_msg.SilentAcks := in_msg.SilentAcks; assert(in_msg.DirectedProbe == false); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.InitialRequestTime := in_msg.InitialRequestTime; @@ -779,9 +786,17 @@ machine(L1Cache, "AMD Hammer-like protocol") peek(responseToCache_in, ResponseMsg) { assert(in_msg.Acks > 0); assert(is_valid(tbe)); + DPRINTF(RubySlicc, "Sender = %s\n", in_msg.Sender); + DPRINTF(RubySlicc, "SilentAcks = %d\n", in_msg.SilentAcks); + if (tbe.AppliedSilentAcks == false) { + tbe.NumPendingMsgs := tbe.NumPendingMsgs - in_msg.SilentAcks; + tbe.AppliedSilentAcks := true; + } DPRINTF(RubySlicc, "%d\n", tbe.NumPendingMsgs); tbe.NumPendingMsgs := tbe.NumPendingMsgs - in_msg.Acks; DPRINTF(RubySlicc, "%d\n", tbe.NumPendingMsgs); + APPEND_TRANSITION_COMMENT(tbe.NumPendingMsgs); + APPEND_TRANSITION_COMMENT(in_msg.Sender); tbe.LastResponder := in_msg.Sender; if (tbe.InitialRequestTime != zero_time() && in_msg.InitialRequestTime != zero_time()) { assert(tbe.InitialRequestTime == in_msg.InitialRequestTime); @@ -844,6 +859,7 @@ machine(L1Cache, "AMD Hammer-like protocol") action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") { peek(forwardToCache_in, RequestMsg) { + assert(in_msg.Requestor != machineID); enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { assert(is_valid(tbe)); out_msg.Address := address; @@ -858,6 +874,7 @@ machine(L1Cache, "AMD Hammer-like protocol") } else { out_msg.Acks := 2; } + out_msg.SilentAcks := in_msg.SilentAcks; out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; @@ -877,6 +894,7 @@ machine(L1Cache, "AMD Hammer-like protocol") out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.SilentAcks := in_msg.SilentAcks; out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; @@ -1387,7 +1405,7 @@ machine(L1Cache, "AMD Hammer-like protocol") n_popResponseQueue; } - transition(SM, Data, ISM) { + transition(SM, {Data, Exclusive_Data}, ISM) { v_writeDataToCacheVerify; m_decrementNumberOfMessages; o_checkForCompletion; diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm index 8f9ce2360..1d18fd489 100644 --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -38,7 +38,8 @@ machine(Directory, "AMD Hammer-like protocol") CacheMemory * probeFilter, MemoryControl * memBuffer, int memory_controller_latency = 2, - bool probe_filter_enabled = false + bool probe_filter_enabled = false, + bool full_bit_dir_enabled = false { MessageBuffer forwardFromDir, network="To", virtual_network="3", ordered="false"; @@ -140,6 +141,7 @@ machine(Directory, "AMD Hammer-like protocol") State PfState, desc="Directory state"; MachineID Owner, desc="Owner node"; DataBlock DataBlk, desc="data for the block"; + Set Sharers, desc="sharing vector for full bit directory"; } // TBE entries for DMA requests @@ -148,6 +150,7 @@ machine(Directory, "AMD Hammer-like protocol") State TBEState, desc="Transient State"; CoherenceResponseType ResponseType, desc="The type for the subsequent response message"; int Acks, default="0", desc="The number of acks that the waiting response represents"; + int SilentAcks, default="0", desc="The number of silent acks associated with this transaction"; DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory"; DataBlock DataBlk, desc="The current view of system memory"; int Len, desc="..."; @@ -173,6 +176,8 @@ machine(Directory, "AMD Hammer-like protocol") // ** OBJECTS ** + Set fwd_set; + TBETable TBEs, template_hack="<Directory_TBE>"; Entry getDirectoryEntry(Address addr), return_by_ref="yes" { @@ -191,7 +196,7 @@ machine(Directory, "AMD Hammer-like protocol") if (is_valid(tbe)) { return tbe.TBEState; } else { - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { if (is_valid(pf_entry)) { assert(pf_entry.PfState == getDirectoryEntry(addr).DirectoryState); } else { @@ -206,7 +211,7 @@ machine(Directory, "AMD Hammer-like protocol") if (is_valid(tbe)) { tbe.TBEState := state; } - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { if (is_valid(pf_entry)) { pf_entry.PfState := state; } @@ -349,7 +354,7 @@ machine(Directory, "AMD Hammer-like protocol") if (in_msg.Type == CoherenceRequestType:PUT) { trigger(Event:PUT, in_msg.Address, pf_entry, tbe); } else { - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { if (is_valid(pf_entry)) { trigger(cache_request_to_event(in_msg.Type), in_msg.Address, pf_entry, tbe); @@ -392,26 +397,44 @@ machine(Directory, "AMD Hammer-like protocol") // Actions action(r_setMRU, "\rr", desc="manually set the MRU bit for pf entry" ) { - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { assert(is_valid(cache_entry)); probeFilter.setMRU(address); } } action(auno_assertUnblockerNotOwner, "auno", desc="assert unblocker not owner") { - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { assert(is_valid(cache_entry)); peek(unblockNetwork_in, ResponseMsg) { assert(cache_entry.Owner != in_msg.Sender); + if (full_bit_dir_enabled) { + assert(cache_entry.Sharers.isElement(machineIDToNodeID(in_msg.Sender)) == false); + } } } } action(uo_updateOwnerIfPf, "uo", desc="update owner") { - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { assert(is_valid(cache_entry)); peek(unblockNetwork_in, ResponseMsg) { cache_entry.Owner := in_msg.Sender; + if (full_bit_dir_enabled) { + cache_entry.Sharers.clear(); + cache_entry.Sharers.add(machineIDToNodeID(in_msg.Sender)); + APPEND_TRANSITION_COMMENT(cache_entry.Sharers); + DPRINTF(RubySlicc, "Sharers = %d\n", cache_entry.Sharers); + } + } + } + } + + action(us_updateSharerIfFBD, "us", desc="update sharer if full-bit directory") { + if (full_bit_dir_enabled) { + assert(probeFilter.isTagPresent(address)); + peek(unblockNetwork_in, ResponseMsg) { + cache_entry.Sharers.add(machineIDToNodeID(in_msg.Sender)); } } } @@ -441,7 +464,7 @@ machine(Directory, "AMD Hammer-like protocol") } action(pfa_probeFilterAllocate, "pfa", desc="Allocate ProbeFilterEntry") { - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { peek(requestQueue_in, RequestMsg) { set_cache_entry(probeFilter.allocate(address, new PfEntry)); cache_entry.Owner := in_msg.Requestor; @@ -450,14 +473,14 @@ machine(Directory, "AMD Hammer-like protocol") } action(pfd_probeFilterDeallocate, "pfd", desc="Deallocate ProbeFilterEntry") { - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { probeFilter.deallocate(address); unset_cache_entry(); } } action(ppfd_possibleProbeFilterDeallocate, "ppfd", desc="Deallocate ProbeFilterEntry") { - if (probe_filter_enabled && is_valid(cache_entry)) { + if ((probe_filter_enabled || full_bit_dir_enabled) && is_valid(cache_entry)) { probeFilter.deallocate(address); unset_cache_entry(); } @@ -495,7 +518,12 @@ machine(Directory, "AMD Hammer-like protocol") action(pa_setPendingMsgsToAll, "pa", desc="set pending msgs to all") { assert(is_valid(tbe)); - tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); + if (full_bit_dir_enabled) { + assert(is_valid(cache_entry)); + tbe.NumPendingMsgs := cache_entry.Sharers.count(); + } else { + tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); + } } action(po_setPendingMsgsToOne, "po", desc="set pending msgs to one") { @@ -510,13 +538,34 @@ machine(Directory, "AMD Hammer-like protocol") action(sa_setAcksToOne, "sa", desc="Forwarded request, set the ack amount to one") { assert(is_valid(tbe)); - tbe.Acks := 1; - } + peek(requestQueue_in, RequestMsg) { + if (full_bit_dir_enabled) { + assert(is_valid(cache_entry)); + // + // If we are using the full-bit directory and no sharers exists beyond + // the requestor, then we must set the ack number to all, not one + // + fwd_set := cache_entry.Sharers; + fwd_set.remove(machineIDToNodeID(in_msg.Requestor)); + if (fwd_set.count() > 0) { + tbe.Acks := 1; + tbe.SilentAcks := machineCount(MachineType:L1Cache) - fwd_set.count(); + tbe.SilentAcks := tbe.SilentAcks - 1; + } else { + tbe.Acks := machineCount(MachineType:L1Cache); + tbe.SilentAcks := 0; + } + } else { + tbe.Acks := 1; + } + } + } action(saa_setAcksToAllIfPF, "saa", desc="Non-forwarded request, set the ack amount to all") { assert(is_valid(tbe)); - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { tbe.Acks := machineCount(MachineType:L1Cache); + tbe.SilentAcks := 0; } else { tbe.Acks := 1; } @@ -590,7 +639,7 @@ machine(Directory, "AMD Hammer-like protocol") } action(spa_setPendingAcksToZeroIfPF, "spa", desc="if probe filter, no need to wait for acks") { - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { assert(is_valid(tbe)); tbe.NumPendingMsgs := 0; } @@ -599,7 +648,7 @@ machine(Directory, "AMD Hammer-like protocol") action(sc_signalCompletionIfPF, "sc", desc="indicate that we should skip waiting for cpu acks") { assert(is_valid(tbe)); if (tbe.NumPendingMsgs == 0) { - assert(probe_filter_enabled); + assert(probe_filter_enabled || full_bit_dir_enabled); enqueue(triggerQueue_out, TriggerMsg) { out_msg.Address := address; out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS; @@ -619,6 +668,7 @@ machine(Directory, "AMD Hammer-like protocol") DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); out_msg.Dirty := false; // By definition, the block is now clean out_msg.Acks := tbe.Acks; + out_msg.SilentAcks := tbe.SilentAcks; DPRINTF(RubySlicc, "%d\n", out_msg.Acks); assert(out_msg.Acks > 0); out_msg.MessageSize := MessageSizeType:Response_Data; @@ -683,7 +733,17 @@ machine(Directory, "AMD Hammer-like protocol") action(r_recordDataInTBE, "rt", desc="Record Data in TBE") { peek(requestQueue_in, RequestMsg) { assert(is_valid(tbe)); - tbe.ResponseType := CoherenceResponseType:DATA; + if (full_bit_dir_enabled) { + fwd_set := cache_entry.Sharers; + fwd_set.remove(machineIDToNodeID(in_msg.Requestor)); + if (fwd_set.count() > 0) { + tbe.ResponseType := CoherenceResponseType:DATA; + } else { + tbe.ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; + } + } else { + tbe.ResponseType := CoherenceResponseType:DATA; + } } } @@ -736,16 +796,37 @@ machine(Directory, "AMD Hammer-like protocol") action(fn_forwardRequestIfNecessary, "fn", desc="Forward requests if necessary") { assert(is_valid(tbe)); if ((machineCount(MachineType:L1Cache) > 1) && (tbe.Acks <= 1)) { - peek(requestQueue_in, RequestMsg) { - enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { - out_msg.Address := address; - out_msg.Type := in_msg.Type; - out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches - out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor - out_msg.MessageSize := MessageSizeType:Broadcast_Control; - out_msg.InitialRequestTime := in_msg.InitialRequestTime; - out_msg.ForwardRequestTime := get_time(); + if (full_bit_dir_enabled) { + assert(is_valid(cache_entry)); + peek(requestQueue_in, RequestMsg) { + fwd_set := cache_entry.Sharers; + fwd_set.remove(machineIDToNodeID(in_msg.Requestor)); + if (fwd_set.count() > 0) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.setNetDest(MachineType:L1Cache, fwd_set); + out_msg.MessageSize := MessageSizeType:Multicast_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := get_time(); + assert(tbe.SilentAcks > 0); + out_msg.SilentAcks := tbe.SilentAcks; + } + } + } + } else { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches + out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor + out_msg.MessageSize := MessageSizeType:Broadcast_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := get_time(); + } } } } @@ -753,12 +834,25 @@ machine(Directory, "AMD Hammer-like protocol") action(ia_invalidateAllRequest, "ia", desc="invalidate all copies") { if (machineCount(MachineType:L1Cache) > 1) { - enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { - out_msg.Address := address; - out_msg.Type := CoherenceRequestType:INV; - out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches - out_msg.MessageSize := MessageSizeType:Broadcast_Control; + if (full_bit_dir_enabled) { + assert(cache_entry.Sharers.count() > 0); + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:INV; + out_msg.Requestor := machineID; + out_msg.Destination.setNetDest(MachineType:L1Cache, cache_entry.Sharers); + out_msg.MessageSize := MessageSizeType:Multicast_Control; + } + } + } else { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:INV; + out_msg.Requestor := machineID; + out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches + out_msg.MessageSize := MessageSizeType:Broadcast_Control; + } } } } @@ -780,15 +874,33 @@ machine(Directory, "AMD Hammer-like protocol") action(fb_forwardRequestBcast, "fb", desc="Forward requests to all nodes") { if (machineCount(MachineType:L1Cache) > 1) { peek(requestQueue_in, RequestMsg) { - enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { - out_msg.Address := address; - out_msg.Type := in_msg.Type; - out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches - out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor - out_msg.MessageSize := MessageSizeType:Broadcast_Control; - out_msg.InitialRequestTime := in_msg.InitialRequestTime; - out_msg.ForwardRequestTime := get_time(); + if (full_bit_dir_enabled) { + fwd_set := cache_entry.Sharers; + fwd_set.remove(machineIDToNodeID(in_msg.Requestor)); + if (fwd_set.count() > 0) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.setNetDest(MachineType:L1Cache, fwd_set); + out_msg.MessageSize := MessageSizeType:Multicast_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := get_time(); + out_msg.SilentAcks := machineCount(MachineType:L1Cache) - fwd_set.count(); + out_msg.SilentAcks := out_msg.SilentAcks - 1; + } + } + } else { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches + out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor + out_msg.MessageSize := MessageSizeType:Broadcast_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := get_time(); + } } } } @@ -820,7 +932,7 @@ machine(Directory, "AMD Hammer-like protocol") action(fc_forwardRequestConditionalOwner, "fc", desc="Forward request to one or more nodes") { assert(machineCount(MachineType:L1Cache) > 1); - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { peek(requestQueue_in, RequestMsg) { enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { assert(is_valid(cache_entry)); @@ -979,7 +1091,7 @@ machine(Directory, "AMD Hammer-like protocol") } action(ano_assertNotOwner, "ano", desc="Assert that request is not current owner") { - if (probe_filter_enabled) { + if (probe_filter_enabled || full_bit_dir_enabled) { peek(requestQueue_in, RequestMsg) { assert(is_valid(cache_entry)); assert(cache_entry.Owner != in_msg.Requestor); @@ -987,6 +1099,32 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(ans_assertNotSharer, "ans", desc="Assert that request is not a current sharer") { + if (full_bit_dir_enabled) { + peek(requestQueue_in, RequestMsg) { + assert(cache_entry.Sharers.isElement(machineIDToNodeID(in_msg.Requestor)) == false); + } + } + } + + action(rs_removeSharer, "s", desc="remove current sharer") { + if (full_bit_dir_enabled) { + peek(unblockNetwork_in, ResponseMsg) { + assert(cache_entry.Sharers.isElement(machineIDToNodeID(in_msg.Sender))); + cache_entry.Sharers.remove(machineIDToNodeID(in_msg.Sender)); + } + } + } + + action(cs_clearSharers, "cs", desc="clear current sharers") { + if (full_bit_dir_enabled) { + peek(requestQueue_in, RequestMsg) { + cache_entry.Sharers.clear(); + cache_entry.Sharers.add(machineIDToNodeID(in_msg.Requestor)); + } + } + } + action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") { peek(unblockNetwork_in, ResponseMsg) { enqueue(memQueue_out, MemoryMsg, latency="1") { @@ -1077,6 +1215,7 @@ machine(Directory, "AMD Hammer-like protocol") sa_setAcksToOne; qf_queueMemoryFetchRequest; fb_forwardRequestBcast; + cs_clearSharers; i_popIncomingRequestQueue; } @@ -1139,6 +1278,7 @@ machine(Directory, "AMD Hammer-like protocol") transition(NX, GETX, NO_B) { r_setMRU; fb_forwardRequestBcast; + cs_clearSharers; i_popIncomingRequestQueue; } @@ -1147,12 +1287,14 @@ machine(Directory, "AMD Hammer-like protocol") r_setMRU; ano_assertNotOwner; fc_forwardRequestConditionalOwner; + cs_clearSharers; i_popIncomingRequestQueue; } transition(S, GETX, NO_B) { r_setMRU; fb_forwardRequestBcast; + cs_clearSharers; i_popIncomingRequestQueue; } @@ -1163,7 +1305,15 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } - transition({NX, NO}, GETS, NO_B) { + transition(NO, GETS, NO_B) { + r_setMRU; + ano_assertNotOwner; + ans_assertNotSharer; + fc_forwardRequestConditionalOwner; + i_popIncomingRequestQueue; + } + + transition(NX, GETS, NO_B) { r_setMRU; ano_assertNotOwner; fc_forwardRequestConditionalOwner; @@ -1211,7 +1361,7 @@ machine(Directory, "AMD Hammer-like protocol") z_stallAndWaitRequest; } - transition({NO_B, NO_B_S, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, + transition({NO_B_X, NO_B, NO_B_S, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W, NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R}, {DMA_READ, DMA_WRITE}) { @@ -1232,17 +1382,20 @@ machine(Directory, "AMD Hammer-like protocol") // unblock responses transition({NO_B, NO_B_X}, UnblockS, NX) { + us_updateSharerIfFBD; k_wakeUpDependents; j_popIncomingUnblockQueue; } transition({NO_B, NO_B_X}, UnblockM, NO) { uo_updateOwnerIfPf; + us_updateSharerIfFBD; k_wakeUpDependents; j_popIncomingUnblockQueue; } transition(NO_B_S, UnblockS, NO_B_S_W) { + us_updateSharerIfFBD; fr_forwardMergeReadRequestsToOwner; sp_setPendingMsgsToMergedSharers; j_popIncomingUnblockQueue; @@ -1256,6 +1409,7 @@ machine(Directory, "AMD Hammer-like protocol") } transition(NO_B_S_W, UnblockS) { + us_updateSharerIfFBD; mu_decrementNumberOfUnblocks; os_checkForMergedGetSCompletion; j_popIncomingUnblockQueue; @@ -1268,6 +1422,14 @@ machine(Directory, "AMD Hammer-like protocol") } transition(O_B, UnblockS, O) { + us_updateSharerIfFBD; + k_wakeUpDependents; + j_popIncomingUnblockQueue; + } + + transition(O_B, UnblockM, NO) { + us_updateSharerIfFBD; + uo_updateOwnerIfPf; k_wakeUpDependents; j_popIncomingUnblockQueue; } @@ -1505,10 +1667,12 @@ machine(Directory, "AMD Hammer-like protocol") } transition(NO_B_W, UnblockS, NO_W) { + us_updateSharerIfFBD; j_popIncomingUnblockQueue; } transition(O_B_W, UnblockS, O_W) { + us_updateSharerIfFBD; j_popIncomingUnblockQueue; } @@ -1527,12 +1691,14 @@ machine(Directory, "AMD Hammer-like protocol") // WB State Transistions transition(WB, Writeback_Dirty, WB_O_W) { l_writeDataToMemory; + rs_removeSharer; l_queueMemoryWBRequest; j_popIncomingUnblockQueue; } transition(WB, Writeback_Exclusive_Dirty, WB_E_W) { l_writeDataToMemory; + rs_removeSharer; l_queueMemoryWBRequest; j_popIncomingUnblockQueue; } @@ -1550,18 +1716,20 @@ machine(Directory, "AMD Hammer-like protocol") transition(WB, Writeback_Clean, O) { ll_checkIncomingWriteback; + rs_removeSharer; k_wakeUpDependents; j_popIncomingUnblockQueue; } transition(WB, Writeback_Exclusive_Clean, E) { ll_checkIncomingWriteback; + rs_removeSharer; pfd_probeFilterDeallocate; k_wakeUpDependents; j_popIncomingUnblockQueue; } - transition(WB, Unblock, NO) { + transition(WB, Unblock, NX) { auno_assertUnblockerNotOwner; k_wakeUpDependents; j_popIncomingUnblockQueue; diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm index c90c8a53c..063cc91ee 100644 --- a/src/mem/protocol/MOESI_hammer-msg.sm +++ b/src/mem/protocol/MOESI_hammer-msg.sm @@ -83,6 +83,7 @@ structure(RequestMsg, desc="...", interface="NetworkMessage") { bool DirectedProbe, default="false", desc="probe filter directed probe"; Time InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache"; Time ForwardRequestTime, default="0", desc="time the dir forwarded the request"; + int SilentAcks, default="0", desc="silent acks from the full-bit directory"; } // ResponseMsg (and also unblock requests) @@ -94,10 +95,11 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") { NetDest Destination, desc="Node to whom the data is sent"; DataBlock DataBlk, desc="data for the cache line"; bool Dirty, desc="Is the data dirty (different than memory)?"; - int Acks, desc="How many messages this counts as"; + int Acks, default="0", desc="How many messages this counts as"; MessageSizeType MessageSize, desc="size category of the message"; Time InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache"; Time ForwardRequestTime, default="0", desc="time the dir forwarded the request"; + int SilentAcks, default="0", desc="silent acks from the full-bit directory"; } enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") { diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm index 574307341..2799be55d 100644 --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -173,6 +173,7 @@ enumeration(MessageSizeType, default="MessageSizeType_Undefined", desc="...") { Writeback_Data, desc="Writeback data"; Writeback_Control, desc="Writeback control"; Broadcast_Control, desc="Broadcast control"; + Multicast_Control, desc="Multicast control"; Forwarded_Control, desc="Forwarded control"; Invalidate_Control, desc="Invalidate control"; Unblock_Control, desc="Unblock control"; diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc index 3e866907f..34ee18fea 100644 --- a/src/mem/ruby/network/Network.cc +++ b/src/mem/ruby/network/Network.cc @@ -73,6 +73,7 @@ Network::MessageSizeType_to_int(MessageSizeType size_type) case MessageSizeType_Response_Control: case MessageSizeType_Writeback_Control: case MessageSizeType_Broadcast_Control: + case MessageSizeType_Multicast_Control: case MessageSizeType_Forwarded_Control: case MessageSizeType_Invalidate_Control: case MessageSizeType_Unblock_Control: |