diff options
author | Tony Gutierrez <anthony.gutierrez@amd.com> | 2016-01-19 14:28:22 -0500 |
---|---|---|
committer | Tony Gutierrez <anthony.gutierrez@amd.com> | 2016-01-19 14:28:22 -0500 |
commit | 1a7d3f9fcb76a68540dd948f91413533a383bfde (patch) | |
tree | 867510a147cd095f19499d26b7c02d27de4cae9d /src/mem | |
parent | 28e353e0403ea379d244a418e8dc8ee0b48187cf (diff) | |
download | gem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz |
gpu-compute: AMD's baseline GPU model
Diffstat (limited to 'src/mem')
53 files changed, 25713 insertions, 75 deletions
diff --git a/src/mem/protocol/GPU_RfO-SQC.sm b/src/mem/protocol/GPU_RfO-SQC.sm new file mode 100644 index 000000000..1e5f8df74 --- /dev/null +++ b/src/mem/protocol/GPU_RfO-SQC.sm @@ -0,0 +1,667 @@ +/* + * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +machine(MachineType:SQC, "GPU SQC (L1 I Cache)") + : Sequencer* sequencer; + CacheMemory * L1cache; + int TCC_select_num_bits; + Cycles issue_latency := 80; // time to send data down to TCC + Cycles l2_hit_latency := 18; + + MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request"; + MessageBuffer * responseFromSQC, network="To", virtual_network="3", vnet_type="response"; + MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock"; + + MessageBuffer * probeToSQC, network="From", virtual_network="1", vnet_type="request"; + MessageBuffer * responseToSQC, network="From", virtual_network="3", vnet_type="response"; + + MessageBuffer * mandatoryQueue; +{ + state_declaration(State, desc="SQC Cache States", default="SQC_State_I") { + I, AccessPermission:Invalid, desc="Invalid"; + S, AccessPermission:Read_Only, desc="Shared"; + + I_S, AccessPermission:Busy, desc="Invalid, issued RdBlkS, have not seen response yet"; + S_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack"; + I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCCdir for canceled WB"; + } + + enumeration(Event, desc="SQC Events") { + // Core initiated + Fetch, desc="Fetch"; + + //TCC initiated + TCC_AckS, desc="TCC Ack to Core Request"; + TCC_AckWB, desc="TCC Ack for WB"; + TCC_NackWB, desc="TCC Nack for WB"; + + // Mem sys initiated + Repl, desc="Replacing block from cache"; + + // Probe Events + PrbInvData, desc="probe, return M data"; + PrbInv, desc="probe, no need for data"; + PrbShrData, desc="probe downgrade, return data"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff than memory)?"; + DataBlock DataBlk, desc="data for the block"; + bool FromL2, default="false", desc="block just moved from L2"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; + bool Shared, desc="Victim hit by shared probe"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<SQC_TBE>", constructor="m_number_of_TBEs"; + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + // Internal functions + Entry getCacheEntry(Addr address), return_by_pointer="yes" { + Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address)); + return cache_entry; + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return tbe.DataBlk; + } else { + return getCacheEntry(addr).DataBlk; + } + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if(is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return SQC_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return SQC_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(SQC_State_to_permission(state)); + } + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:DataArrayWrite) { + L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:TagArrayRead) { + L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayWrite) { + L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + // Out Ports + + out_port(requestNetwork_out, CPURequestMsg, requestFromSQC); + out_port(responseNetwork_out, ResponseMsg, responseFromSQC); + out_port(unblockNetwork_out, UnblockMsg, unblockFromCore); + + // In Ports + + in_port(probeNetwork_in, TDProbeRequestMsg, probeToSQC) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == ProbeRequestType:PrbInv) { + if (in_msg.ReturnData) { + trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { + assert(in_msg.ReturnData); + trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); + } + } + } + } + + in_port(responseToSQC_in, ResponseMsg, responseToSQC) { + if (responseToSQC_in.isReady(clockEdge())) { + peek(responseToSQC_in, ResponseMsg, block_on="addr") { + + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == CoherenceResponseType:TDSysResp) { + if (in_msg.State == CoherenceState:Shared) { + trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe); + } else { + error("SQC should not receive TDSysResp other than CoherenceState:Shared"); + } + } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) { + trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) { + trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe); + } else { + error("Unexpected Response Message to Core"); + } + } + } + } + + in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { + if (mandatoryQueue_in.isReady(clockEdge())) { + peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { + Entry cache_entry := getCacheEntry(in_msg.LineAddress); + TBE tbe := TBEs.lookup(in_msg.LineAddress); + + assert(in_msg.Type == RubyRequestType:IFETCH); + if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { + trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe); + } else { + Addr victim := L1cache.cacheProbe(in_msg.LineAddress); + trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } + } + } + + // Actions + + action(ic_invCache, "ic", desc="invalidate cache") { + if(is_valid(cache_entry)) { + L1cache.deallocate(address); + } + unset_cache_entry(); + } + + action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkS; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(vc_victim, "vc", desc="Victimize E/S Data") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicClean; + out_msg.InitialRequestTime := curCycle(); + if (cache_entry.CacheState == State:S) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + out_msg.InitialRequestTime := curCycle(); + } + } + + action(a_allocate, "a", desc="allocate block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L1cache.allocate(address, new Entry)); + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + assert(is_valid(cache_entry)); + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs + tbe.Dirty := cache_entry.Dirty; + tbe.Shared := false; + } + + action(d_deallocateTBE, "d", desc="Deallocate TBE") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { + mandatoryQueue_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { + responseToSQC_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="pop probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + + action(l_loadDone, "l", desc="local load done") { + assert(is_valid(cache_entry)); + sequencer.readCallback(address, cache_entry.DataBlk, + false, MachineType:L1Cache); + APPEND_TRANSITION_COMMENT(cache_entry.DataBlk); + } + + action(xl_loadDone, "xl", desc="remote load done") { + peek(responseToSQC_in, ResponseMsg) { + assert(is_valid(cache_entry)); + sequencer.readCallback(address, + cache_entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + APPEND_TRANSITION_COMMENT(cache_entry.DataBlk); + } + } + + action(w_writeCache, "w", desc="write data to cache") { + peek(responseToSQC_in, ResponseMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { + peek(responseToSQC_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:StaleNotif; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(wb_data, "wb", desc="write back data") { + peek(responseToSQC_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUData; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + if (tbe.Shared) { + out_msg.NbReqShared := true; + } else { + out_msg.NbReqShared := false; + } + out_msg.State := CoherenceState:Shared; // faux info + out_msg.MessageSize := MessageSizeType:Writeback_Data; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; + out_msg.Ntsl := true; + out_msg.Hit := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; // only true if sending back data i think + out_msg.Hit := false; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry) || is_valid(tbe)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.DataBlk := getDataBlock(address); + if (is_valid(tbe)) { + out_msg.Dirty := tbe.Dirty; + } else { + out_msg.Dirty := cache_entry.Dirty; + } + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry) || is_valid(tbe)); + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.DataBlk := getDataBlock(address); + if (is_valid(tbe)) { + out_msg.Dirty := tbe.Dirty; + } else { + out_msg.Dirty := cache_entry.Dirty; + } + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") { + assert(is_valid(tbe)); + tbe.Shared := true; + } + + action(uu_sendUnblock, "uu", desc="state changed, unblock") { + enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { + out_msg.addr := address; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") { + probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") { + mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + // Transitions + + // transitions from base + transition(I, Fetch, I_S) {TagArrayRead, TagArrayWrite} { + a_allocate; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + // simple hit transitions + transition(S, Fetch) {TagArrayRead, DataArrayRead} { + l_loadDone; + p_popMandatoryQueue; + } + + // recycles from transients + transition({I_S, S_I, I_C}, {Fetch, Repl}) {} { + zz_recycleMandatoryQueue; + } + + transition(S, Repl, S_I) {TagArrayRead} { + t_allocateTBE; + vc_victim; + ic_invCache; + } + + // TCC event + transition(I_S, TCC_AckS, S) {DataArrayRead, DataArrayWrite} { + w_writeCache; + xl_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(S_I, TCC_NackWB, I){TagArrayWrite} { + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(S_I, TCC_AckWB, I) {TagArrayWrite} { + wb_data; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(I_C, TCC_AckWB, I){TagArrayWrite} { + ss_sendStaleNotification; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(I_C, TCC_NackWB, I) {TagArrayWrite} { + d_deallocateTBE; + pr_popResponseQueue; + } + + // Probe transitions + transition({S, I}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { + pd_sendProbeResponseData; + ic_invCache; + pp_popProbeQueue; + } + + transition(I_C, PrbInvData, I_C) { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + transition({S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + transition({S}, PrbShrData, S) {DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({I, I_C}, PrbShrData) {TagArrayRead} { + prm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + transition(I_C, PrbInv, I_C){ + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + transition(I_S, {PrbInv, PrbInvData}) {} { + pi_sendProbeResponseInv; + ic_invCache; + a_allocate; // but make sure there is room for incoming data when it arrives + pp_popProbeQueue; + } + + transition(I_S, PrbShrData) {} { + prm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + transition(S_I, PrbInvData, I_C) {TagArrayWrite} { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + transition(S_I, PrbInv, I_C) {TagArrayWrite} { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + transition(S_I, PrbShrData) {DataArrayRead} { + pd_sendProbeResponseData; + sf_setSharedFlip; + pp_popProbeQueue; + } +} diff --git a/src/mem/protocol/GPU_RfO-TCC.sm b/src/mem/protocol/GPU_RfO-TCC.sm new file mode 100644 index 000000000..cfddb3f00 --- /dev/null +++ b/src/mem/protocol/GPU_RfO-TCC.sm @@ -0,0 +1,1199 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +machine(MachineType:TCC, "TCC Cache") + : CacheMemory * L2cache; + WireBuffer * w_reqToTCCDir; + WireBuffer * w_respToTCCDir; + WireBuffer * w_TCCUnblockToTCCDir; + WireBuffer * w_reqToTCC; + WireBuffer * w_probeToTCC; + WireBuffer * w_respToTCC; + int TCC_select_num_bits; + Cycles l2_request_latency := 1; + Cycles l2_response_latency := 20; + + // To the general response network + MessageBuffer * responseFromTCC, network="To", virtual_network="3", vnet_type="response"; + + // From the general response network + MessageBuffer * responseToTCC, network="From", virtual_network="3", vnet_type="response"; + +{ + // EVENTS + enumeration(Event, desc="TCC Events") { + // Requests coming from the Cores + RdBlk, desc="CPU RdBlk event"; + RdBlkM, desc="CPU RdBlkM event"; + RdBlkS, desc="CPU RdBlkS event"; + CtoD, desc="Change to Dirty request"; + WrVicBlk, desc="L1 Victim (dirty)"; + WrVicBlkShared, desc="L1 Victim (dirty)"; + ClVicBlk, desc="L1 Victim (clean)"; + ClVicBlkShared, desc="L1 Victim (clean)"; + + CPUData, desc="WB data from CPU"; + CPUDataShared, desc="WB data from CPU, NBReqShared 1"; + StaleWB, desc="Stale WB, No data"; + + L2_Repl, desc="L2 Replacement"; + + // Probes + PrbInvData, desc="Invalidating probe, return dirty data"; + PrbInv, desc="Invalidating probe, no need to return data"; + PrbShrData, desc="Downgrading probe, return data"; + + // Coming from Memory Controller + WBAck, desc="ack from memory"; + + CancelWB, desc="Cancel WB from L2"; + } + + // STATES + state_declaration(State, desc="TCC State", default="TCC_State_I") { + M, AccessPermission:Read_Write, desc="Modified"; // No other cache has copy, memory stale + O, AccessPermission:Read_Only, desc="Owned"; // Correct most recent copy, others may exist in S + E, AccessPermission:Read_Write, desc="Exclusive"; // Correct, most recent, and only copy (and == Memory) + S, AccessPermission:Read_Only, desc="Shared"; // Correct, most recent. If no one in O, then == Memory + I, AccessPermission:Invalid, desc="Invalid"; + + I_M, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data"; + I_O, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data"; + I_E, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data"; + I_S, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data"; + S_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to M"; + S_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O"; + S_E, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to E"; + S_S, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to S"; + E_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O"; + E_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O"; + E_E, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O"; + E_S, AccessPermission:Busy, desc="Shared, received WrVicBlk, sent Ack, waiting for Data"; + O_M, AccessPermission:Busy, desc="..."; + O_O, AccessPermission:Busy, desc="..."; + O_E, AccessPermission:Busy, desc="..."; + M_M, AccessPermission:Busy, desc="..."; + M_O, AccessPermission:Busy, desc="..."; + M_E, AccessPermission:Busy, desc="..."; + M_S, AccessPermission:Busy, desc="..."; + D_I, AccessPermission:Invalid, desc="drop WB data on the floor when receive"; + MOD_I, AccessPermission:Busy, desc="drop WB data on the floor, waiting for WBAck from Mem"; + MO_I, AccessPermission:Busy, desc="M or O, received L2_Repl, waiting for WBAck from Mem"; + ES_I, AccessPermission:Busy, desc="E or S, received L2_Repl, waiting for WBAck from Mem"; + I_C, AccessPermission:Invalid, desc="sent cancel, just waiting to receive mem wb ack so nothing gets confused"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + + // STRUCTURES + + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff from memory?)"; + DataBlock DataBlk, desc="Data for the block"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block"; + bool Dirty, desc="Is the data dirty?"; + bool Shared, desc="Victim hit by shared probe"; + MachineID From, desc="Waiting for writeback from..."; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs"; + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + + + // FUNCTION DEFINITIONS + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + Entry getCacheEntry(Addr addr), return_by_pointer="yes" { + return static_cast(Entry, "pointer", L2cache.lookup(addr)); + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + return getCacheEntry(addr).DataBlk; + } + + bool presentOrAvail(Addr addr) { + return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr); + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return TCC_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return TCC_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(TCC_State_to_permission(state)); + } + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:DataArrayWrite) { + L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:TagArrayRead) { + L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayWrite) { + L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + + + // OUT PORTS + out_port(w_requestNetwork_out, CPURequestMsg, w_reqToTCCDir); + out_port(w_TCCResp_out, ResponseMsg, w_respToTCCDir); + out_port(responseNetwork_out, ResponseMsg, responseFromTCC); + out_port(w_unblockNetwork_out, UnblockMsg, w_TCCUnblockToTCCDir); + + // IN PORTS + in_port(TDResponse_in, ResponseMsg, w_respToTCC) { + if (TDResponse_in.isReady(clockEdge())) { + peek(TDResponse_in, ResponseMsg) { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:TDSysWBAck) { + trigger(Event:WBAck, in_msg.addr, cache_entry, tbe); + } + else { + DPRINTF(RubySlicc, "%s\n", in_msg); + error("Error on TDResponse Type"); + } + } + } + } + + // Response Network + in_port(responseNetwork_in, ResponseMsg, responseToTCC) { + if (responseNetwork_in.isReady(clockEdge())) { + peek(responseNetwork_in, ResponseMsg) { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:CPUData) { + if (in_msg.NbReqShared) { + trigger(Event:CPUDataShared, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:CPUData, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:StaleNotif) { + trigger(Event:StaleWB, in_msg.addr, cache_entry, tbe); + } else { + DPRINTF(RubySlicc, "%s\n", in_msg); + error("Error on TDResponse Type"); + } + } + } + } + + // probe network + in_port(probeNetwork_in, TDProbeRequestMsg, w_probeToTCC) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, TDProbeRequestMsg) { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + if (in_msg.Type == ProbeRequestType:PrbInv) { + if (in_msg.ReturnData) { + trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { + if (in_msg.ReturnData) { + trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); + } else { + error("Don't think I should get any of these"); + } + } + } + } + } + + // Request Network + in_port(requestNetwork_in, CPURequestMsg, w_reqToTCC) { + if (requestNetwork_in.isReady(clockEdge())) { + peek(requestNetwork_in, CPURequestMsg) { + assert(in_msg.Destination.isElement(machineID)); + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + if (in_msg.Type == CoherenceRequestType:RdBlk) { + trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { + trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { + trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:VicClean) { + if (presentOrAvail(in_msg.addr)) { + if (in_msg.Shared) { + trigger(Event:ClVicBlkShared, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:ClVicBlk, in_msg.addr, cache_entry, tbe); + } + } else { + Addr victim := L2cache.cacheProbe(in_msg.addr); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else if (in_msg.Type == CoherenceRequestType:VicDirty) { + if (presentOrAvail(in_msg.addr)) { + if (in_msg.Shared) { + trigger(Event:WrVicBlkShared, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); + } + } else { + Addr victim := L2cache.cacheProbe(in_msg.addr); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else { + requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + } + } + } + + // BEGIN ACTIONS + + action(i_invL2, "i", desc="invalidate TCC cache block") { + if (is_valid(cache_entry)) { + L2cache.deallocate(address); + } + unset_cache_entry(); + } + + action(rm_sendResponseM, "rm", desc="send Modified response") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := cache_entry.Dirty; + out_msg.State := CoherenceState:Modified; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(rs_sendResponseS, "rs", desc="send Shared response") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := cache_entry.Dirty; + out_msg.State := CoherenceState:Shared; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + + action(r_requestToTD, "r", desc="Miss in L2, pass on") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Shared := false; // unneeded for this request + out_msg.MessageSize := in_msg.MessageSize; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + if (is_valid(cache_entry)) { + tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs + tbe.Dirty := cache_entry.Dirty; + } + tbe.From := machineID; + } + + action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(vc_vicClean, "vc", desc="Victimize Clean L2 data") { + enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:VicClean; + out_msg.Requestor := machineID; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(vd_vicDirty, "vd", desc="Victimize dirty L2 data") { + enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:VicDirty; + out_msg.Requestor := machineID; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(w_sendResponseWBAck, "w", desc="send WB Ack") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysWBAck; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { + enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(ph_sendProbeResponseHit, "ph", desc="send probe ack, no data") { + enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; + out_msg.Hit := true; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pm_sendProbeResponseMiss, "pm", desc="send probe ack, no data") { + enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { + enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.DataBlk := cache_entry.DataBlk; + //assert(cache_entry.Dirty); Not needed in TCC where TCC can supply clean data + out_msg.Dirty := cache_entry.Dirty; + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") { + enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.DataBlk := tbe.DataBlk; + //assert(tbe.Dirty); + out_msg.Dirty := tbe.Dirty; + out_msg.Hit := true; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.State := CoherenceState:NA; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(mc_cancelMemWriteback, "mc", desc="send writeback cancel to memory") { + enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:WrCancel; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + + action(a_allocateBlock, "a", desc="allocate TCC block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L2cache.allocate(address, new Entry)); + } + } + + action(d_writeData, "d", desc="write data to TCC") { + peek(responseNetwork_in, ResponseMsg) { + if (in_msg.Dirty) { + cache_entry.Dirty := in_msg.Dirty; + } + cache_entry.DataBlk := in_msg.DataBlk; + DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg); + } + } + + action(rd_copyDataFromRequest, "rd", desc="write data to TCC") { + peek(requestNetwork_in, CPURequestMsg) { + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := true; + } + } + + action(f_setFrom, "f", desc="set who WB is expected to come from") { + peek(requestNetwork_in, CPURequestMsg) { + tbe.From := in_msg.Requestor; + } + } + + action(rf_resetFrom, "rf", desc="reset From") { + tbe.From := machineID; + } + + action(wb_data, "wb", desc="write back data") { + enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUData; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + if (tbe.Shared) { + out_msg.NbReqShared := true; + } else { + out_msg.NbReqShared := false; + } + out_msg.State := CoherenceState:Shared; // faux info + out_msg.MessageSize := MessageSizeType:Writeback_Data; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(wt_writeDataToTBE, "wt", desc="write WB data to TBE") { + peek(responseNetwork_in, ResponseMsg) { + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + } + } + + action(uo_sendUnblockOwner, "uo", desc="state changed to E, M, or O, unblock") { + enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + out_msg.currentOwner := true; + out_msg.valid := true; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(us_sendUnblockSharer, "us", desc="state changed to S , unblock") { + enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + out_msg.currentOwner := false; + out_msg.valid := true; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(un_sendUnblockNotValid, "un", desc="state changed toI, unblock") { + enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + out_msg.currentOwner := false; + out_msg.valid := false; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { + L2cache.setMRU(address); + } + + action(p_popRequestQueue, "p", desc="pop request queue") { + requestNetwork_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="pop response queue") { + responseNetwork_in.dequeue(clockEdge()); + } + + action(pn_popTDResponseQueue, "pn", desc="pop TD response queue") { + TDResponse_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="pop probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + + action(zz_recycleRequestQueue, "\z", desc="recycle request queue") { + requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + + // END ACTIONS + + // BEGIN TRANSITIONS + + // transitions from base + + transition({I, I_C}, {RdBlk, RdBlkS, RdBlkM, CtoD}){TagArrayRead} { + // TCCdir already knows that the block is not here. This is to allocate and get the block. + r_requestToTD; + p_popRequestQueue; + } + +// check + transition({M, O}, RdBlk, O){TagArrayRead, TagArrayWrite} { + rs_sendResponseS; + ut_updateTag; + // detect 2nd chancing + p_popRequestQueue; + } + +//check + transition({E, S}, RdBlk, S){TagArrayRead, TagArrayWrite} { + rs_sendResponseS; + ut_updateTag; + // detect 2nd chancing + p_popRequestQueue; + } + +// check + transition({M, O}, RdBlkS, O){TagArrayRead, TagArrayWrite} { + rs_sendResponseS; + ut_updateTag; + // detect 2nd chance sharing + p_popRequestQueue; + } + +//check + transition({E, S}, RdBlkS, S){TagArrayRead, TagArrayWrite} { + rs_sendResponseS; + ut_updateTag; + // detect 2nd chance sharing + p_popRequestQueue; + } + +// check + transition(M, RdBlkM, I){TagArrayRead, TagArrayWrite} { + rm_sendResponseM; + i_invL2; + p_popRequestQueue; + } + + //check + transition(E, RdBlkM, I){TagArrayRead, TagArrayWrite} { + rm_sendResponseM; + i_invL2; + p_popRequestQueue; + } + +// check + transition({I}, WrVicBlk, I_M){TagArrayRead} { + a_allocateBlock; + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(I_C, {WrVicBlk, WrVicBlkShared, ClVicBlk, ClVicBlkShared}) { + zz_recycleRequestQueue; + } + +//check + transition({I}, WrVicBlkShared, I_O) {TagArrayRead}{ + a_allocateBlock; + t_allocateTBE; + f_setFrom; +// rd_copyDataFromRequest; + w_sendResponseWBAck; + p_popRequestQueue; + } + +//check + transition(S, WrVicBlkShared, S_O){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(S, WrVicBlk, S_S){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(E, WrVicBlk, E_E){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(E, WrVicBlkShared, E_E){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(O, WrVicBlk, O_O){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(O, WrVicBlkShared, O_O){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(M, WrVicBlk, M_M){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(M, WrVicBlkShared, M_O){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +//check + transition({I}, ClVicBlk, I_E){TagArrayRead} { + t_allocateTBE; + f_setFrom; + a_allocateBlock; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition({I}, ClVicBlkShared, I_S){TagArrayRead} { + t_allocateTBE; + f_setFrom; + a_allocateBlock; + w_sendResponseWBAck; + p_popRequestQueue; + } + +//check + transition(S, ClVicBlkShared, S_S){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(E, ClVicBlk, E_E){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(E, ClVicBlkShared, E_S){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(O, ClVicBlk, O_O){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// check. Original L3 ahd it going from O to O_S. Something can go from O to S only on writeback. + transition(O, ClVicBlkShared, O_O){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(M, ClVicBlk, M_E){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + +// a stale writeback + transition(M, ClVicBlkShared, M_S){TagArrayRead} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + + transition({MO_I}, {RdBlk, RdBlkS, RdBlkM, CtoD}) { + a_allocateBlock; + t_allocateTBE; + f_setFrom; + r_requestToTD; + p_popRequestQueue; + } + + transition(MO_I, {WrVicBlkShared, WrVicBlk, ClVicBlk, ClVicBlkShared}, MOD_I) { + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(I_M, CPUData, M){TagArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_E, CPUData, E){TagArrayWrite, DataArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite} { + us_sendUnblockSharer; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite} { + us_sendUnblockSharer; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(S_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(S_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(S_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite} { + us_sendUnblockSharer; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(S_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite} { + us_sendUnblockSharer; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(O_E, CPUDataShared, O){TagArrayWrite, DataArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(O_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition({D_I}, {CPUData, CPUDataShared}, I){TagArrayWrite} { + un_sendUnblockNotValid; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(MOD_I, {CPUData, CPUDataShared}, MO_I) { + un_sendUnblockNotValid; + rf_resetFrom; + pr_popResponseQueue; + } + + transition({O,S,I}, CPUData) { + pr_popResponseQueue; + } + + transition({M, O}, L2_Repl, MO_I){TagArrayRead, DataArrayRead} { + t_allocateTBE; + vd_vicDirty; + i_invL2; + } + + transition({E, S,}, L2_Repl, ES_I){TagArrayRead, DataArrayRead} { + t_allocateTBE; + vc_vicClean; + i_invL2; + } + + transition({I_M, I_O, S_M, S_O, E_M, E_O}, L2_Repl) { + zz_recycleRequestQueue; + } + + transition({O_M, O_O, O_E, M_M, M_O, M_E, M_S}, L2_Repl) { + zz_recycleRequestQueue; + } + + transition({I_E, I_S, S_E, S_S, E_E, E_S}, L2_Repl) { + zz_recycleRequestQueue; + } + + transition({M, O}, PrbInvData, I){TagArrayRead, TagArrayWrite} { + pd_sendProbeResponseData; + i_invL2; + pp_popProbeQueue; + } + + transition(I, PrbInvData){TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition({E, S}, PrbInvData, I){TagArrayRead, TagArrayWrite} { + pd_sendProbeResponseData; + i_invL2; + pp_popProbeQueue; + } + + transition({M, O, E, S, I}, PrbInv, I){TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + i_invL2; // nothing will happen in I + pp_popProbeQueue; + } + + transition({M, O}, PrbShrData, O){TagArrayRead, TagArrayWrite} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({E, S}, PrbShrData, S){TagArrayRead, TagArrayWrite} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition(I, PrbShrData){TagArrayRead} { + pm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + transition(MO_I, PrbInvData, I_C) { + pdt_sendProbeResponseDataFromTBE; + pp_popProbeQueue; + } + + transition(ES_I, PrbInvData, I_C) { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition({ES_I,MO_I}, PrbInv, I_C) { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition({ES_I, MO_I}, PrbShrData) { + pdt_sendProbeResponseDataFromTBE; + pp_popProbeQueue; + } + + transition(I_C, {PrbInvData, PrbInv}) { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition(I_C, PrbShrData) { + pm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + transition(MOD_I, WBAck, D_I) { + pn_popTDResponseQueue; + } + + transition(MO_I, WBAck, I){TagArrayWrite} { + dt_deallocateTBE; + pn_popTDResponseQueue; + } + + // this can only be a spurious CPUData from a shared block. + transition(MO_I, CPUData) { + pr_popResponseQueue; + } + + transition(ES_I, WBAck, I){TagArrayWrite} { + dt_deallocateTBE; + pn_popTDResponseQueue; + } + + transition(I_C, {WBAck}, I){TagArrayWrite} { + dt_deallocateTBE; + pn_popTDResponseQueue; + } + + transition({I_M, I_O, I_E, I_S}, StaleWB, I){TagArrayWrite} { + un_sendUnblockNotValid; + dt_deallocateTBE; + i_invL2; + pr_popResponseQueue; + } + + transition({S_S, S_O, S_M, S_E}, StaleWB, S){TagArrayWrite} { + us_sendUnblockSharer; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition({E_M, E_O, E_E, E_S}, StaleWB, E){TagArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition({O_M, O_O, O_E}, StaleWB, O){TagArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition({M_M, M_O, M_E, M_S}, StaleWB, M){TagArrayWrite} { + uo_sendUnblockOwner; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(D_I, StaleWB, I) {TagArrayWrite}{ + un_sendUnblockNotValid; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(MOD_I, StaleWB, MO_I) { + un_sendUnblockNotValid; + rf_resetFrom; + pr_popResponseQueue; + } + +} diff --git a/src/mem/protocol/GPU_RfO-TCCdir.sm b/src/mem/protocol/GPU_RfO-TCCdir.sm new file mode 100644 index 000000000..8f58d6ebb --- /dev/null +++ b/src/mem/protocol/GPU_RfO-TCCdir.sm @@ -0,0 +1,2672 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Mithuna Thottethodi + */ + +machine(MachineType:TCCdir, "AMD read-for-ownership directory for TCC (aka GPU L2)") +: CacheMemory * directory; + // Convention: wire buffers are prefixed with "w_" for clarity + WireBuffer * w_reqToTCCDir; + WireBuffer * w_respToTCCDir; + WireBuffer * w_TCCUnblockToTCCDir; + WireBuffer * w_reqToTCC; + WireBuffer * w_probeToTCC; + WireBuffer * w_respToTCC; + int TCC_select_num_bits; + Cycles response_latency := 5; + Cycles directory_latency := 6; + Cycles issue_latency := 120; + + // From the TCPs or SQCs + MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request"; + MessageBuffer * responseFromTCP, network="From", virtual_network="3", vnet_type="response"; + MessageBuffer * unblockFromTCP, network="From", virtual_network="5", vnet_type="unblock"; + + // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC. + MessageBuffer * probeToCore, network="To", virtual_network="1", vnet_type="request"; + MessageBuffer * responseToCore, network="To", virtual_network="3", vnet_type="response"; + + // From the NB + MessageBuffer * probeFromNB, network="From", virtual_network="0", vnet_type="request"; + MessageBuffer * responseFromNB, network="From", virtual_network="2", vnet_type="response"; + // To the NB + MessageBuffer * requestToNB, network="To", virtual_network="0", vnet_type="request"; + MessageBuffer * responseToNB, network="To", virtual_network="2", vnet_type="response"; + MessageBuffer * unblockToNB, network="To", virtual_network="4", vnet_type="unblock"; + + MessageBuffer * triggerQueue, random="false"; +{ + // STATES + state_declaration(State, desc="Directory states", default="TCCdir_State_I") { + // Base states + I, AccessPermission:Invalid, desc="Invalid"; + S, AccessPermission:Invalid, desc="Shared"; + E, AccessPermission:Invalid, desc="Shared"; + O, AccessPermission:Invalid, desc="Owner"; + M, AccessPermission:Invalid, desc="Modified"; + + CP_I, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to invalid"; + B_I, AccessPermission:Invalid, desc="Blocked, need not send data after acks are in, going to invalid"; + CP_O, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to owned"; + CP_S, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to shared"; + CP_OM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to O_M"; + CP_SM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to S_M"; + CP_ISM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M"; + CP_IOM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M"; + CP_OSIW, AccessPermission:Invalid, desc="Blocked, must send data after acks+CancelWB are in, going to I_C"; + + + // Transient states and busy states used for handling side (TCC-facing) interactions + BW_S, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock"; + BW_E, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock"; + BW_O, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock"; + BW_M, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock"; + + // Transient states and busy states used for handling upward (TCP-facing) interactions + I_M, AccessPermission:Invalid, desc="Invalid, issued RdBlkM, have not seen response yet"; + I_ES, AccessPermission:Invalid, desc="Invalid, issued RdBlk, have not seen response yet"; + I_S, AccessPermission:Invalid, desc="Invalid, issued RdBlkS, have not seen response yet"; + BBS_S, AccessPermission:Invalid, desc="Blocked, going from S to S"; + BBO_O, AccessPermission:Invalid, desc="Blocked, going from O to O"; + BBM_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for data to forward"; + BBM_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for data to forward"; + BB_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for unblock"; + BB_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for unblock"; + BB_OO, AccessPermission:Invalid, desc="Blocked, going from O to O (adding sharers), waiting for unblock"; + BB_S, AccessPermission:Invalid, desc="Blocked, going to S, waiting for (possible multiple) unblock(s)"; + BBS_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M"; + BBO_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M"; + BBS_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade"; + BBO_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade"; + S_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet"; + O_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet"; + + // + BBB_S, AccessPermission:Invalid, desc="Blocked, going to S after core unblock"; + BBB_M, AccessPermission:Invalid, desc="Blocked, going to M after core unblock"; + BBB_E, AccessPermission:Invalid, desc="Blocked, going to E after core unblock"; + + VES_I, AccessPermission:Invalid, desc="TCC replacement, waiting for clean WB ack"; + VM_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack"; + VO_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack"; + VO_S, AccessPermission:Invalid, desc="TCC owner replacement, waiting for dirty WB ack"; + + ES_I, AccessPermission:Invalid, desc="L1 replacement, waiting for clean WB ack"; + MO_I, AccessPermission:Invalid, desc="L1 replacement, waiting for dirty WB ack"; + + I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB for canceled WB"; + I_W, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB; canceled WB raced with directory invalidation"; + + // Recall States + BRWD_I, AccessPermission:Invalid, desc="Recalling, waiting for WBAck and Probe Data responses"; + BRW_I, AccessPermission:Read_Write, desc="Recalling, waiting for WBAck"; + BRD_I, AccessPermission:Invalid, desc="Recalling, waiting for Probe Data responses"; + + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + + + // EVENTS + enumeration(Event, desc="TCC Directory Events") { + // Upward facing events (TCCdir w.r.t. TCP/SQC and TCC behaves like NBdir behaves with TCP/SQC and L3 + + // Directory Recall + Recall, desc="directory cache is full"; + // CPU requests + CPUWrite, desc="Initial req from core, sent to TCC"; + NoCPUWrite, desc="Initial req from core, but non-exclusive clean data; can be discarded"; + CPUWriteCancel, desc="Initial req from core, sent to TCC"; + + // Requests from the TCPs + RdBlk, desc="RdBlk event"; + RdBlkM, desc="RdBlkM event"; + RdBlkS, desc="RdBlkS event"; + CtoD, desc="Change to Dirty request"; + + // TCC writebacks + VicDirty, desc="..."; + VicDirtyLast, desc="..."; + VicClean, desc="..."; + NoVic, desc="..."; + StaleVic, desc="..."; + CancelWB, desc="TCC got invalidating probe, canceled WB"; + + // Probe Responses from TCP/SQCs + CPUPrbResp, desc="Probe response from TCP/SQC"; + TCCPrbResp, desc="Probe response from TCC"; + + ProbeAcksComplete, desc="All acks received"; + ProbeAcksCompleteReissue, desc="All acks received, changing CtoD to reissue"; + + CoreUnblock, desc="unblock from TCP/SQC"; + LastCoreUnblock, desc="Last unblock from TCP/SQC"; + TCCUnblock, desc="unblock from TCC (current owner)"; + TCCUnblock_Sharer, desc="unblock from TCC (a sharer, not owner)"; + TCCUnblock_NotValid,desc="unblock from TCC (not valid...caused by stale writebacks)"; + + // Downward facing events + + // NB initiated + NB_AckS, desc="NB Ack to TCC Request"; + NB_AckE, desc="NB Ack to TCC Request"; + NB_AckM, desc="NB Ack to TCC Request"; + NB_AckCtoD, desc="NB Ack to TCC Request"; + NB_AckWB, desc="NB Ack for clean WB"; + + + // Incoming Probes from NB + PrbInvData, desc="Invalidating probe, return dirty data"; + PrbInv, desc="Invalidating probe, no need to return data"; + PrbShrData, desc="Downgrading probe, return data"; + } + + + // TYPES + + // Entry for directory + structure(Entry, desc="...", interface='AbstractCacheEntry') { + State CacheState, desc="Cache state (Cache of directory entries)"; + DataBlock DataBlk, desc="data for the block"; + NetDest Sharers, desc="Sharers for this block"; + NetDest Owner, desc="Owner of this block"; + NetDest MergedSharers, desc="Read sharers who are merged on a request"; + int WaitingUnblocks, desc="Number of acks we're waiting for"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="DataBlk"; + bool Dirty, desc="Is the data dirty?"; + MachineID Requestor, desc="requestor"; + int NumPendingAcks, desc="num acks expected"; + MachineID OriginalRequestor, desc="Original Requestor"; + MachineID UntransferredOwner, desc = "Untransferred owner for an upgrade transaction"; + bool UntransferredOwnerExists, desc = "1 if Untransferred owner exists for an upgrade transaction"; + bool Cached, desc="data hit in Cache"; + bool Shared, desc="victim hit by shared probe"; + bool Upgrade, desc="An upgrade request in progress"; + bool CtoD, desc="Saved sysack info"; + CoherenceState CohState, desc="Saved sysack info"; + MessageSizeType MessageSize, desc="Saved sysack info"; + MachineID Sender, desc="sender"; + } + + structure(TBETable, external = "yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + // ** OBJECTS ** + TBETable TBEs, template="<TCCdir_TBE>", constructor="m_number_of_TBEs"; + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + NetDest TCC_dir_subtree; + NetDest temp; + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + + + bool presentOrAvail(Addr addr) { + return directory.isTagPresent(addr) || directory.cacheAvail(addr); + } + + Entry getCacheEntry(Addr addr), return_by_pointer="yes" { + return static_cast(Entry, "pointer", directory.lookup(addr)); + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return tbe.DataBlk; + } else { + assert(false); + return getCacheEntry(addr).DataBlk; + } + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if(is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(TCCdir_State_to_permission(state)); + } + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return TCCdir_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return TCCdir_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + + if (state == State:S) { + assert(cache_entry.Owner.count() == 0); + } + + if (state == State:O) { + assert(cache_entry.Owner.count() == 1); + assert(cache_entry.Sharers.isSuperset(cache_entry.Owner) == false); + } + + if (state == State:M) { + assert(cache_entry.Owner.count() == 1); + assert(cache_entry.Sharers.count() == 0); + } + + if (state == State:E) { + assert(cache_entry.Owner.count() == 0); + assert(cache_entry.Sharers.count() == 1); + } + } + } + + + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + directory.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:DataArrayWrite) { + directory.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:TagArrayRead) { + directory.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayWrite) { + directory.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return directory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return directory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return directory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return directory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + // ** OUT_PORTS ** + + // Three classes of ports + // Class 1: downward facing network links to NB + out_port(requestToNB_out, CPURequestMsg, requestToNB); + out_port(responseToNB_out, ResponseMsg, responseToNB); + out_port(unblockToNB_out, UnblockMsg, unblockToNB); + + + // Class 2: upward facing ports to GPU cores + out_port(probeToCore_out, TDProbeRequestMsg, probeToCore); + out_port(responseToCore_out, ResponseMsg, responseToCore); + + // Class 3: sideward facing ports (on "wirebuffer" links) to TCC + out_port(w_requestTCC_out, CPURequestMsg, w_reqToTCC); + out_port(w_probeTCC_out, NBProbeRequestMsg, w_probeToTCC); + out_port(w_respTCC_out, ResponseMsg, w_respToTCC); + + + // local trigger port + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + + // + // request queue going to NB + // + + // ** IN_PORTS ** + + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=8) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + assert(is_valid(tbe)); + Entry cache_entry := getCacheEntry(in_msg.addr); + if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == false)) { + trigger(Event:ProbeAcksComplete, in_msg.addr, cache_entry, tbe); + } else if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == true)) { + trigger(Event:ProbeAcksCompleteReissue, in_msg.addr, cache_entry, tbe); + } + } + } + } + + // Unblock Networks (TCCdir can receive unblocks from TCC, TCPs) + // Port on first (of three) wire buffers from TCC + in_port(w_TCCUnblock_in, UnblockMsg, w_TCCUnblockToTCCDir, rank=7) { + if (w_TCCUnblock_in.isReady(clockEdge())) { + peek(w_TCCUnblock_in, UnblockMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.currentOwner) { + trigger(Event:TCCUnblock, in_msg.addr, cache_entry, tbe); + } else if (in_msg.valid) { + trigger(Event:TCCUnblock_Sharer, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:TCCUnblock_NotValid, in_msg.addr, cache_entry, tbe); + } + } + } + } + + in_port(unblockNetwork_in, UnblockMsg, unblockFromTCP, rank=6) { + if (unblockNetwork_in.isReady(clockEdge())) { + peek(unblockNetwork_in, UnblockMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if(cache_entry.WaitingUnblocks == 1) { + trigger(Event:LastCoreUnblock, in_msg.addr, cache_entry, tbe); + } + else { + trigger(Event:CoreUnblock, in_msg.addr, cache_entry, tbe); + } + } + } + } + + + //Responses from TCC, and Cores + // Port on second (of three) wire buffers from TCC + in_port(w_TCCResponse_in, ResponseMsg, w_respToTCCDir, rank=5) { + if (w_TCCResponse_in.isReady(clockEdge())) { + peek(w_TCCResponse_in, ResponseMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:CPUPrbResp) { + trigger(Event:TCCPrbResp, in_msg.addr, cache_entry, tbe); + } + } + } + } + + in_port(responseNetwork_in, ResponseMsg, responseFromTCP, rank=4) { + if (responseNetwork_in.isReady(clockEdge())) { + peek(responseNetwork_in, ResponseMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:CPUPrbResp) { + trigger(Event:CPUPrbResp, in_msg.addr, cache_entry, tbe); + } + } + } + } + + + // Port on third (of three) wire buffers from TCC + in_port(w_TCCRequest_in, CPURequestMsg, w_reqToTCCDir, rank=3) { + if(w_TCCRequest_in.isReady(clockEdge())) { + peek(w_TCCRequest_in, CPURequestMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.Type == CoherenceRequestType:WrCancel) { + trigger(Event:CancelWB, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:VicDirty) { + if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) { + // if modified, or owner with no other sharers + if ((cache_entry.CacheState == State:M) || (cache_entry.Sharers.count() == 0)) { + assert(cache_entry.Owner.count()==1); + trigger(Event:VicDirtyLast, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:VicDirty, in_msg.addr, cache_entry, tbe); + } + } else { + trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe); + } + } else { + if (in_msg.Type == CoherenceRequestType:VicClean) { + if (is_valid(cache_entry) && cache_entry.Sharers.isElement(in_msg.Requestor)) { + if (cache_entry.Sharers.count() == 1) { + // Last copy, victimize to L3 + trigger(Event:VicClean, in_msg.addr, cache_entry, tbe); + } else { + // Either not the last copy or stall. No need to victimmize + // remove sharer from sharer list + assert(cache_entry.Sharers.count() > 1); + trigger(Event:NoVic, in_msg.addr, cache_entry, tbe); + } + } else { + trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe); + } + } + } + } + } + } + + in_port(responseFromNB_in, ResponseMsg, responseFromNB, rank=2) { + if (responseFromNB_in.isReady(clockEdge())) { + peek(responseFromNB_in, ResponseMsg, block_on="addr") { + + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:NBSysResp) { + if (in_msg.State == CoherenceState:Modified) { + if (in_msg.CtoD) { + trigger(Event:NB_AckCtoD, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.State == CoherenceState:Shared) { + trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe); + } else if (in_msg.State == CoherenceState:Exclusive) { + trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) { + trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe); + } else { + error("Unexpected Response Message to Core"); + } + } + } + } + + // Finally handling incoming requests (from TCP) and probes (from NB). + + in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB, rank=1) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, NBProbeRequestMsg) { + DPRINTF(RubySlicc, "%s\n", in_msg); + DPRINTF(RubySlicc, "machineID: %s\n", machineID); + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == ProbeRequestType:PrbInv) { + if (in_msg.ReturnData) { + trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { + assert(in_msg.ReturnData); + trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); + } + } + } + } + + + in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) { + if (coreRequestNetwork_in.isReady(clockEdge())) { + peek(coreRequestNetwork_in, CPURequestMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (presentOrAvail(in_msg.addr)) { + if (in_msg.Type == CoherenceRequestType:VicDirty) { + trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:VicClean) { + if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) { + trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); + } else if(is_valid(cache_entry) && (cache_entry.Sharers.count() + cache_entry.Owner.count() ) >1) { + trigger(Event:NoCPUWrite, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:RdBlk) { + trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { + trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { + trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:WrCancel) { + trigger(Event:CPUWriteCancel, in_msg.addr, cache_entry, tbe); + } + } else { + // All requests require a directory entry + Addr victim := directory.cacheProbe(in_msg.addr); + trigger(Event:Recall, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } + } + } + + + + + // Actions + + //Downward facing actions + + action(c_clearOwner, "c", desc="Clear the owner field") { + cache_entry.Owner.clear(); + } + + action(rS_removeRequesterFromSharers, "rS", desc="Remove unblocker from sharer list") { + peek(unblockNetwork_in, UnblockMsg) { + cache_entry.Sharers.remove(in_msg.Sender); + } + } + + action(rT_removeTCCFromSharers, "rT", desc="Remove TCC from sharer list") { + peek(w_TCCRequest_in, CPURequestMsg) { + cache_entry.Sharers.remove(in_msg.Requestor); + } + } + + action(rO_removeOriginalRequestorFromSharers, "rO", desc="Remove replacing core from sharer list") { + peek(coreRequestNetwork_in, CPURequestMsg) { + cache_entry.Sharers.remove(in_msg.Requestor); + } + } + + action(rC_removeCoreFromSharers, "rC", desc="Remove replacing core from sharer list") { + peek(coreRequestNetwork_in, CPURequestMsg) { + cache_entry.Sharers.remove(in_msg.Requestor); + } + } + + action(rCo_removeCoreFromOwner, "rCo", desc="Remove replacing core from sharer list") { + // Note that under some cases this action will try to remove a stale owner + peek(coreRequestNetwork_in, CPURequestMsg) { + cache_entry.Owner.remove(in_msg.Requestor); + } + } + + action(rR_removeResponderFromSharers, "rR", desc="Remove responder from sharer list") { + peek(responseNetwork_in, ResponseMsg) { + cache_entry.Sharers.remove(in_msg.Sender); + } + } + + action(nC_sendNullWBAckToCore, "nC", desc = "send a null WB Ack to release core") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(responseToCore_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysWBNack; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := in_msg.MessageSize; + } + } + } + + action(nT_sendNullWBAckToTCC, "nT", desc = "send a null WB Ack to release TCC") { + peek(w_TCCRequest_in, CPURequestMsg) { + enqueue(w_respTCC_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysWBAck; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := in_msg.MessageSize; + } + } + } + + action(eto_moveExSharerToOwner, "eto", desc="move the current exclusive sharer to owner") { + assert(cache_entry.Sharers.count() == 1); + assert(cache_entry.Owner.count() == 0); + cache_entry.Owner := cache_entry.Sharers; + cache_entry.Sharers.clear(); + APPEND_TRANSITION_COMMENT(" new owner "); + APPEND_TRANSITION_COMMENT(cache_entry.Owner); + } + + action(aT_addTCCToSharers, "aT", desc="Add TCC to sharer list") { + peek(w_TCCUnblock_in, UnblockMsg) { + cache_entry.Sharers.add(in_msg.Sender); + } + } + + action(as_addToSharers, "as", desc="Add unblocker to sharer list") { + peek(unblockNetwork_in, UnblockMsg) { + cache_entry.Sharers.add(in_msg.Sender); + } + } + + action(c_moveOwnerToSharer, "cc", desc="Move owner to sharers") { + cache_entry.Sharers.addNetDest(cache_entry.Owner); + cache_entry.Owner.clear(); + } + + action(cc_clearSharers, "\c", desc="Clear the sharers field") { + cache_entry.Sharers.clear(); + } + + action(e_ownerIsUnblocker, "e", desc="The owner is now the unblocker") { + peek(unblockNetwork_in, UnblockMsg) { + cache_entry.Owner.clear(); + cache_entry.Owner.add(in_msg.Sender); + APPEND_TRANSITION_COMMENT(" tcp_ub owner "); + APPEND_TRANSITION_COMMENT(cache_entry.Owner); + } + } + + action(eT_ownerIsUnblocker, "eT", desc="TCC (unblocker) is now owner") { + peek(w_TCCUnblock_in, UnblockMsg) { + cache_entry.Owner.clear(); + cache_entry.Owner.add(in_msg.Sender); + APPEND_TRANSITION_COMMENT(" tcc_ub owner "); + APPEND_TRANSITION_COMMENT(cache_entry.Owner); + } + } + + action(ctr_copyTCCResponseToTBE, "ctr", desc="Copy TCC probe response data to TBE") { + peek(w_TCCResponse_in, ResponseMsg) { + // Overwrite data if tbe does not hold dirty data. Stop once it is dirty. + if(tbe.Dirty == false) { + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + tbe.Sender := in_msg.Sender; + } + DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk)); + } + } + + action(ccr_copyCoreResponseToTBE, "ccr", desc="Copy core probe response data to TBE") { + peek(responseNetwork_in, ResponseMsg) { + // Overwrite data if tbe does not hold dirty data. Stop once it is dirty. + if(tbe.Dirty == false) { + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + + if(tbe.Sender == machineID) { + tbe.Sender := in_msg.Sender; + } + } + DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk)); + } + } + + action(cd_clearDirtyBitTBE, "cd", desc="Clear Dirty bit in TBE") { + tbe.Dirty := false; + } + + action(n_issueRdBlk, "n-", desc="Issue RdBlk") { + enqueue(requestToNB_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlk; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + + action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") { + enqueue(requestToNB_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkS; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + + action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") { + enqueue(requestToNB_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkM; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + + action(rU_rememberUpgrade, "rU", desc="Remember that this was an upgrade") { + tbe.Upgrade := true; + } + + action(ruo_rememberUntransferredOwner, "ruo", desc="Remember the untransferred owner") { + peek(responseNetwork_in, ResponseMsg) { + if(in_msg.UntransferredOwner == true) { + tbe.UntransferredOwner := in_msg.Sender; + tbe.UntransferredOwnerExists := true; + } + DPRINTF(RubySlicc, "%s\n", (in_msg)); + } + } + + action(ruoT_rememberUntransferredOwnerTCC, "ruoT", desc="Remember the untransferred owner") { + peek(w_TCCResponse_in, ResponseMsg) { + if(in_msg.UntransferredOwner == true) { + tbe.UntransferredOwner := in_msg.Sender; + tbe.UntransferredOwnerExists := true; + } + DPRINTF(RubySlicc, "%s\n", (in_msg)); + } + } + + action(vd_victim, "vd", desc="Victimize M/O Data") { + enqueue(requestToNB_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicDirty; + if (cache_entry.CacheState == State:O) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + out_msg.Dirty := true; + } + } + + action(vc_victim, "vc", desc="Victimize E/S Data") { + enqueue(requestToNB_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicClean; + if (cache_entry.CacheState == State:S) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + out_msg.Dirty := false; + } + } + + + action(sT_sendRequestToTCC, "sT", desc="send request to TCC") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(w_requestTCC_out, CPURequestMsg, 1) { + out_msg.addr := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Shared := in_msg.Shared; + out_msg.MessageSize := in_msg.MessageSize; + } + APPEND_TRANSITION_COMMENT(" requestor "); + APPEND_TRANSITION_COMMENT(in_msg.Requestor); + + } + } + + + action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") { + MachineID tcc := mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + + temp := cache_entry.Sharers; + temp.addNetDest(cache_entry.Owner); + if (temp.isElement(tcc)) { + temp.remove(tcc); + } + if (temp.count() > 0) { + enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination := temp; + tbe.NumPendingAcks := temp.count(); + if(cache_entry.CacheState == State:M) { + assert(tbe.NumPendingAcks == 1); + } + DPRINTF(RubySlicc, "%s\n", (out_msg)); + } + } + } + + action(ls2_probeShrL2Data, "ls2", desc="local probe downgrade L2, return data") { + MachineID tcc := mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { + enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.add(tcc); + tbe.NumPendingAcks := tbe.NumPendingAcks + 1; + DPRINTF(RubySlicc, "%s\n", out_msg); + + } + } + } + + action(s2_probeShrL2Data, "s2", desc="probe shared L2, return data") { + MachineID tcc := mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { + enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.add(tcc); + tbe.NumPendingAcks := tbe.NumPendingAcks + 1; + DPRINTF(RubySlicc, "%s\n", out_msg); + + } + } + } + + action(ldc_probeInvCoreData, "ldc", desc="local probe to inv cores, return data") { + MachineID tcc := mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + peek(coreRequestNetwork_in, CPURequestMsg) { + NetDest dest:= cache_entry.Sharers; + dest.addNetDest(cache_entry.Owner); + if(dest.isElement(tcc)){ + dest.remove(tcc); + } + dest.remove(in_msg.Requestor); + tbe.NumPendingAcks := dest.count(); + if (dest.count()>0){ + enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + + out_msg.Destination.addNetDest(dest); + if(cache_entry.CacheState == State:M) { + assert(tbe.NumPendingAcks == 1); + } + + DPRINTF(RubySlicc, "%s\n", (out_msg)); + } + } + } + } + + action(ld2_probeInvL2Data, "ld2", desc="local probe inv L2, return data") { + MachineID tcc := mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { + enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.add(tcc); + tbe.NumPendingAcks := tbe.NumPendingAcks + 1; + DPRINTF(RubySlicc, "%s\n", out_msg); + + } + } + } + + action(dc_probeInvCoreData, "dc", desc="probe inv cores + TCC, return data") { + MachineID tcc := mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + + out_msg.Destination.addNetDest(cache_entry.Sharers); + out_msg.Destination.addNetDest(cache_entry.Owner); + tbe.NumPendingAcks := cache_entry.Sharers.count() + cache_entry.Owner.count(); + if(cache_entry.CacheState == State:M) { + assert(tbe.NumPendingAcks == 1); + } + if (out_msg.Destination.isElement(tcc)) { + out_msg.Destination.remove(tcc); + tbe.NumPendingAcks := tbe.NumPendingAcks - 1; + } + + DPRINTF(RubySlicc, "%s\n", (out_msg)); + } + } + + action(d2_probeInvL2Data, "d2", desc="probe inv L2, return data") { + MachineID tcc := mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { + enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.add(tcc); + tbe.NumPendingAcks := tbe.NumPendingAcks + 1; + DPRINTF(RubySlicc, "%s\n", out_msg); + + } + } + } + + action(lpc_probeInvCore, "lpc", desc="local probe inv cores, no data") { + peek(coreRequestNetwork_in, CPURequestMsg) { + TCC_dir_subtree.broadcast(MachineType:TCP); + TCC_dir_subtree.broadcast(MachineType:SQC); + + temp := cache_entry.Sharers; + temp := temp.OR(cache_entry.Owner); + TCC_dir_subtree := TCC_dir_subtree.AND(temp); + tbe.NumPendingAcks := TCC_dir_subtree.count(); + if(cache_entry.CacheState == State:M) { + assert(tbe.NumPendingAcks == 1); + } + if(TCC_dir_subtree.isElement(in_msg.Requestor)) { + TCC_dir_subtree.remove(in_msg.Requestor); + tbe.NumPendingAcks := tbe.NumPendingAcks - 1; + } + + if(TCC_dir_subtree.count() > 0) { + enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := false; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.localCtoD := true; + + out_msg.Destination.addNetDest(TCC_dir_subtree); + + DPRINTF(RubySlicc, "%s\n", (out_msg)); + } + } + } + } + + action(ipc_probeInvCore, "ipc", desc="probe inv cores, no data") { + TCC_dir_subtree.broadcast(MachineType:TCP); + TCC_dir_subtree.broadcast(MachineType:SQC); + + temp := cache_entry.Sharers; + temp := temp.OR(cache_entry.Owner); + TCC_dir_subtree := TCC_dir_subtree.AND(temp); + tbe.NumPendingAcks := TCC_dir_subtree.count(); + if(TCC_dir_subtree.count() > 0) { + + enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := false; + out_msg.MessageSize := MessageSizeType:Control; + + out_msg.Destination.addNetDest(TCC_dir_subtree); + if(cache_entry.CacheState == State:M) { + assert(tbe.NumPendingAcks == 1); + } + + DPRINTF(RubySlicc, "%s\n", (out_msg)); + } + } + } + + action(i2_probeInvL2, "i2", desc="probe inv L2, no data") { + MachineID tcc := mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { + enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { + tbe.NumPendingAcks := tbe.NumPendingAcks + 1; + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := false; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.add(tcc); + DPRINTF(RubySlicc, "%s\n", out_msg); + + } + } + } + + action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { + enqueue(responseToNB_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { + enqueue(responseToNB_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Dirty := false; + out_msg.Ntsl := true; + out_msg.Hit := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") { + enqueue(responseToNB_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Dirty := false; // only true if sending back data i think + out_msg.Hit := false; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + + + action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { + enqueue(responseToNB_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry) || is_valid(tbe)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := getDataBlock(address); + if (is_valid(tbe)) { + out_msg.Dirty := tbe.Dirty; + } + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + + action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { + enqueue(responseToNB_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry) || is_valid(tbe)); + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := getDataBlock(address); + if (is_valid(tbe)) { + out_msg.Dirty := tbe.Dirty; + } + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(mc_cancelWB, "mc", desc="send writeback cancel to NB directory") { + enqueue(requestToNB_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:WrCancel; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Requestor := machineID; + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + + action(sCS_sendCollectiveResponseS, "sCS", desc="send shared response to all merged TCP/SQC") { + enqueue(responseToCore_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := tbe.Sender; + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.CtoD := false; + out_msg.State := CoherenceState:Shared; + out_msg.Destination.addNetDest(cache_entry.MergedSharers); + out_msg.Shared := tbe.Shared; + out_msg.Dirty := tbe.Dirty; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(sS_sendResponseS, "sS", desc="send shared response to TCP/SQC") { + enqueue(responseToCore_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := tbe.Sender; + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.CtoD := false; + out_msg.State := CoherenceState:Shared; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.Shared := tbe.Shared; + out_msg.Dirty := tbe.Dirty; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(sM_sendResponseM, "sM", desc="send response to TCP/SQC") { + enqueue(responseToCore_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := tbe.Sender; + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.CtoD := false; + out_msg.State := CoherenceState:Modified; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.Shared := tbe.Shared; + out_msg.Dirty := tbe.Dirty; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + + + action(fw2_forwardWBAck, "fw2", desc="forward WBAck to TCC") { + peek(responseFromNB_in, ResponseMsg) { + if(tbe.OriginalRequestor != machineID) { + enqueue(w_respTCC_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysWBAck; + out_msg.Sender := machineID; + //out_msg.DataBlk := tbe.DataBlk; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.MessageSize := in_msg.MessageSize; + } + } + } + } + + action(sa_saveSysAck, "sa", desc="Save SysAck ") { + peek(responseFromNB_in, ResponseMsg) { + tbe.Dirty := in_msg.Dirty; + if (tbe.Dirty == false) { + tbe.DataBlk := in_msg.DataBlk; + } + else { + tbe.DataBlk := tbe.DataBlk; + } + tbe.CtoD := in_msg.CtoD; + tbe.CohState := in_msg.State; + tbe.Shared := in_msg.Shared; + tbe.MessageSize := in_msg.MessageSize; + } + } + + action(fsa_forwardSavedAck, "fsa", desc="forward saved SysAck to TCP or SQC") { + enqueue(responseToCore_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := machineID; + if (tbe.Dirty == false) { + out_msg.DataBlk := tbe.DataBlk; + } + else { + out_msg.DataBlk := tbe.DataBlk; + } + out_msg.CtoD := tbe.CtoD; + out_msg.State := tbe.CohState; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.Shared := tbe.Shared; + out_msg.MessageSize := tbe.MessageSize; + out_msg.Dirty := tbe.Dirty; + out_msg.Sender := tbe.Sender; + } + } + + action(fa_forwardSysAck, "fa", desc="forward SysAck to TCP or SQC") { + peek(responseFromNB_in, ResponseMsg) { + enqueue(responseToCore_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := machineID; + if (tbe.Dirty == false) { + out_msg.DataBlk := in_msg.DataBlk; + tbe.Sender := machineID; + } + else { + out_msg.DataBlk := tbe.DataBlk; + } + out_msg.CtoD := in_msg.CtoD; + out_msg.State := in_msg.State; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.Shared := in_msg.Shared; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.Dirty := in_msg.Dirty; + out_msg.Sender := tbe.Sender; + DPRINTF(RubySlicc, "%s\n", (out_msg.DataBlk)); + } + } + } + + action(pso_probeSharedDataOwner, "pso", desc="probe shared data at owner") { + MachineID tcc := mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + if (cache_entry.Owner.isElement(tcc)) { + enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.add(tcc); + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + else { // i.e., owner is a core + enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.addNetDest(cache_entry.Owner); + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + tbe.NumPendingAcks := 1; + } + + action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") { + coreRequestNetwork_in.dequeue(clockEdge()); + } + + action(j_popIncomingUnblockQueue, "j", desc="Pop incoming unblock queue") { + unblockNetwork_in.dequeue(clockEdge()); + } + + action(pk_popResponseQueue, "pk", desc="Pop response queue") { + responseNetwork_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="Pop incoming probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + + action(pR_popResponseFromNBQueue, "pR", desc="Pop incoming Response queue From NB") { + responseFromNB_in.dequeue(clockEdge()); + } + + action(pt_popTriggerQueue, "pt", desc="pop trigger queue") { + triggerQueue_in.dequeue(clockEdge()); + } + + action(pl_popTCCRequestQueue, "pl", desc="pop TCC request queue") { + w_TCCRequest_in.dequeue(clockEdge()); + } + + action(plr_popTCCResponseQueue, "plr", desc="pop TCC response queue") { + w_TCCResponse_in.dequeue(clockEdge()); + } + + action(plu_popTCCUnblockQueue, "plu", desc="pop TCC unblock queue") { + w_TCCUnblock_in.dequeue(clockEdge()); + } + + + action(m_addUnlockerToSharers, "m", desc="Add the unlocker to the sharer list") { + peek(unblockNetwork_in, UnblockMsg) { + cache_entry.Sharers.add(in_msg.Sender); + cache_entry.MergedSharers.remove(in_msg.Sender); + assert(cache_entry.WaitingUnblocks >= 0); + cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks - 1; + } + } + + action(q_addOutstandingMergedSharer, "q", desc="Increment outstanding requests") { + peek(coreRequestNetwork_in, CPURequestMsg) { + cache_entry.MergedSharers.add(in_msg.Requestor); + cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks + 1; + } + } + + action(uu_sendUnblock, "uu", desc="state changed, unblock") { + enqueue(unblockToNB_out, UnblockMsg, issue_latency) { + out_msg.addr := address; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(zz_recycleRequest, "\z", desc="Recycle the request queue") { + coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(yy_recycleTCCRequestQueue, "yy", desc="recycle yy request queue") { + w_TCCRequest_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(xz_recycleResponseQueue, "xz", desc="recycle response queue") { + responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(xx_recycleTCCResponseQueue, "xx", desc="recycle TCC response queue") { + w_TCCResponse_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(vv_recycleTCCUnblockQueue, "vv", desc="Recycle the probe request queue") { + w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(xy_recycleUnblockQueue, "xy", desc="Recycle the probe request queue") { + w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(ww_recycleProbeRequest, "ww", desc="Recycle the probe request queue") { + probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(x_decrementAcks, "x", desc="decrement Acks pending") { + tbe.NumPendingAcks := tbe.NumPendingAcks - 1; + } + + action(o_checkForAckCompletion, "o", desc="check for ack completion") { + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + APPEND_TRANSITION_COMMENT(" tbe acks "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + } + + action(tp_allocateTBE, "tp", desc="allocate TBE Entry for upward transactions") { + check_allocate(TBEs); + peek(probeNetwork_in, NBProbeRequestMsg) { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.Dirty := false; + tbe.NumPendingAcks := 0; + tbe.UntransferredOwnerExists := false; + } + } + + action(tv_allocateTBE, "tv", desc="allocate TBE Entry for TCC transactions") { + check_allocate(TBEs); + peek(w_TCCRequest_in, CPURequestMsg) { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.DataBlk := in_msg.DataBlk; // Data only for WBs + tbe.Dirty := false; + tbe.OriginalRequestor := in_msg.Requestor; + tbe.NumPendingAcks := 0; + tbe.UntransferredOwnerExists := false; + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs);//check whether resources are full + peek(coreRequestNetwork_in, CPURequestMsg) { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs + tbe.Dirty := false; + tbe.Upgrade := false; + tbe.OriginalRequestor := in_msg.Requestor; + tbe.NumPendingAcks := 0; + tbe.UntransferredOwnerExists := false; + tbe.Sender := machineID; + } + } + + action(tr_allocateTBE, "tr", desc="allocate TBE Entry for recall") { + check_allocate(TBEs);//check whether resources are full + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs + tbe.Dirty := false; + tbe.Upgrade := false; + tbe.OriginalRequestor := machineID; //Recall request, Self initiated + tbe.NumPendingAcks := 0; + tbe.UntransferredOwnerExists := false; + } + + action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") { + TBEs.deallocate(address); + unset_tbe(); + } + + + action(d_allocateDir, "d", desc="allocate Directory Cache") { + if (is_invalid(cache_entry)) { + set_cache_entry(directory.allocate(address, new Entry)); + } + } + + action(dd_deallocateDir, "dd", desc="deallocate Directory Cache") { + if (is_valid(cache_entry)) { + directory.deallocate(address); + } + unset_cache_entry(); + } + + action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { + enqueue(responseToNB_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:StaleNotif; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(wb_data, "wb", desc="write back data") { + enqueue(responseToNB_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUData; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + if (tbe.Shared) { + out_msg.NbReqShared := true; + } else { + out_msg.NbReqShared := false; + } + out_msg.State := CoherenceState:Shared; // faux info + out_msg.MessageSize := MessageSizeType:Writeback_Data; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") { + assert(is_valid(tbe)); + tbe.Shared := true; + } + + action(y_writeDataToTBE, "y", desc="write Probe Data to TBE") { + peek(responseNetwork_in, ResponseMsg) { + if (!tbe.Dirty || in_msg.Dirty) { + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + } + if (in_msg.Hit) { + tbe.Cached := true; + } + } + } + + action(ty_writeTCCDataToTBE, "ty", desc="write TCC Probe Data to TBE") { + peek(w_TCCResponse_in, ResponseMsg) { + if (!tbe.Dirty || in_msg.Dirty) { + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + } + if (in_msg.Hit) { + tbe.Cached := true; + } + } + } + + + action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { + directory.setMRU(address); + } + + // TRANSITIONS + + // Handling TCP/SQC requests (similar to how NB dir handles TCC events with some changes to account for stateful directory). + + + // transitions from base + transition(I, RdBlk, I_ES){TagArrayRead} { + d_allocateDir; + t_allocateTBE; + n_issueRdBlk; + i_popIncomingRequestQueue; + } + + transition(I, RdBlkS, I_S){TagArrayRead} { + d_allocateDir; + t_allocateTBE; + nS_issueRdBlkS; + i_popIncomingRequestQueue; + } + + + transition(I_S, NB_AckS, BBB_S) { + fa_forwardSysAck; + pR_popResponseFromNBQueue; + } + + transition(I_ES, NB_AckS, BBB_S) { + fa_forwardSysAck; + pR_popResponseFromNBQueue; + } + + transition(I_ES, NB_AckE, BBB_E) { + fa_forwardSysAck; + pR_popResponseFromNBQueue; + } + + transition({S_M, O_M}, {NB_AckCtoD,NB_AckM}, BBB_M) { + fa_forwardSysAck; + pR_popResponseFromNBQueue; + } + + transition(I_M, NB_AckM, BBB_M) { + fa_forwardSysAck; + pR_popResponseFromNBQueue; + } + + transition(BBB_M, CoreUnblock, M){TagArrayWrite} { + c_clearOwner; + cc_clearSharers; + e_ownerIsUnblocker; + uu_sendUnblock; + dt_deallocateTBE; + j_popIncomingUnblockQueue; + } + + transition(BBB_S, CoreUnblock, S){TagArrayWrite} { + as_addToSharers; + uu_sendUnblock; + dt_deallocateTBE; + j_popIncomingUnblockQueue; + } + + transition(BBB_E, CoreUnblock, E){TagArrayWrite} { + as_addToSharers; + uu_sendUnblock; + dt_deallocateTBE; + j_popIncomingUnblockQueue; + } + + + transition(I, RdBlkM, I_M){TagArrayRead} { + d_allocateDir; + t_allocateTBE; + nM_issueRdBlkM; + i_popIncomingRequestQueue; + } + + // + transition(S, {RdBlk, RdBlkS}, BBS_S){TagArrayRead} { + t_allocateTBE; + sc_probeShrCoreData; + s2_probeShrL2Data; + q_addOutstandingMergedSharer; + i_popIncomingRequestQueue; + } + // Merging of read sharing into a single request + transition(BBS_S, {RdBlk, RdBlkS}) { + q_addOutstandingMergedSharer; + i_popIncomingRequestQueue; + } + // Wait for probe acks to be complete + transition(BBS_S, CPUPrbResp) { + ccr_copyCoreResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + + transition(BBS_S, TCCPrbResp) { + ctr_copyTCCResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + + // Window for merging complete with this transition + // Send responses to all outstanding + transition(BBS_S, ProbeAcksComplete, BB_S) { + sCS_sendCollectiveResponseS; + pt_popTriggerQueue; + } + + transition(BB_S, CoreUnblock, BB_S) { + m_addUnlockerToSharers; + j_popIncomingUnblockQueue; + } + + transition(BB_S, LastCoreUnblock, S) { + m_addUnlockerToSharers; + dt_deallocateTBE; + j_popIncomingUnblockQueue; + } + + transition(O, {RdBlk, RdBlkS}, BBO_O){TagArrayRead} { + t_allocateTBE; + pso_probeSharedDataOwner; + q_addOutstandingMergedSharer; + i_popIncomingRequestQueue; + } + // Merging of read sharing into a single request + transition(BBO_O, {RdBlk, RdBlkS}) { + q_addOutstandingMergedSharer; + i_popIncomingRequestQueue; + } + + // Wait for probe acks to be complete + transition(BBO_O, CPUPrbResp) { + ccr_copyCoreResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + + transition(BBO_O, TCCPrbResp) { + ctr_copyTCCResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + + // Window for merging complete with this transition + // Send responses to all outstanding + transition(BBO_O, ProbeAcksComplete, BB_OO) { + sCS_sendCollectiveResponseS; + pt_popTriggerQueue; + } + + transition(BB_OO, CoreUnblock) { + m_addUnlockerToSharers; + j_popIncomingUnblockQueue; + } + + transition(BB_OO, LastCoreUnblock, O){TagArrayWrite} { + m_addUnlockerToSharers; + dt_deallocateTBE; + j_popIncomingUnblockQueue; + } + + transition(S, CPUWrite, BW_S){TagArrayRead} { + t_allocateTBE; + rC_removeCoreFromSharers; + sT_sendRequestToTCC; + i_popIncomingRequestQueue; + } + + transition(E, CPUWrite, BW_E){TagArrayRead} { + t_allocateTBE; + rC_removeCoreFromSharers; + sT_sendRequestToTCC; + i_popIncomingRequestQueue; + } + + transition(O, CPUWrite, BW_O){TagArrayRead} { + t_allocateTBE; + rCo_removeCoreFromOwner; + rC_removeCoreFromSharers; + sT_sendRequestToTCC; + i_popIncomingRequestQueue; + } + + transition(M, CPUWrite, BW_M){TagArrayRead} { + t_allocateTBE; + rCo_removeCoreFromOwner; + rC_removeCoreFromSharers; + sT_sendRequestToTCC; + i_popIncomingRequestQueue; + } + + transition(BW_S, TCCUnblock_Sharer, S){TagArrayWrite} { + aT_addTCCToSharers; + dt_deallocateTBE; + plu_popTCCUnblockQueue; + } + + transition(BW_S, TCCUnblock_NotValid, S){TagArrayWrite} { + dt_deallocateTBE; + plu_popTCCUnblockQueue; + } + + transition(BW_E, TCCUnblock, E){TagArrayWrite} { + cc_clearSharers; + aT_addTCCToSharers; + dt_deallocateTBE; + plu_popTCCUnblockQueue; + } + + transition(BW_E, TCCUnblock_NotValid, E) { + dt_deallocateTBE; + plu_popTCCUnblockQueue; + } + + transition(BW_M, TCCUnblock, M) { + c_clearOwner; + cc_clearSharers; + eT_ownerIsUnblocker; + dt_deallocateTBE; + plu_popTCCUnblockQueue; + } + + transition(BW_M, TCCUnblock_NotValid, M) { + // Note this transition should only be executed if we received a stale wb + dt_deallocateTBE; + plu_popTCCUnblockQueue; + } + + transition(BW_O, TCCUnblock, O) { + c_clearOwner; + eT_ownerIsUnblocker; + dt_deallocateTBE; + plu_popTCCUnblockQueue; + } + + transition(BW_O, TCCUnblock_NotValid, O) { + // Note this transition should only be executed if we received a stale wb + dt_deallocateTBE; + plu_popTCCUnblockQueue; + } + + // We lost the owner likely do to an invalidation racing with a 'O' wb + transition(BW_O, TCCUnblock_Sharer, S) { + c_clearOwner; + aT_addTCCToSharers; + dt_deallocateTBE; + plu_popTCCUnblockQueue; + } + + transition({BW_M, BW_S, BW_E, BW_O}, {PrbInv,PrbInvData,PrbShrData}) { + ww_recycleProbeRequest; + } + + transition(BRWD_I, {PrbInvData, PrbInv, PrbShrData}) { + ww_recycleProbeRequest; + } + + // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD + transition(S, CtoD, BBS_UM) {TagArrayRead} { + t_allocateTBE; + lpc_probeInvCore; + i2_probeInvL2; + o_checkForAckCompletion; + i_popIncomingRequestQueue; + } + + transition(BBS_UM, CPUPrbResp, BBS_UM) { + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + + transition(BBS_UM, TCCPrbResp) { + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + + transition(BBS_UM, ProbeAcksComplete, S_M) { + rU_rememberUpgrade; + nM_issueRdBlkM; + pt_popTriggerQueue; + } + + // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD + transition(O, CtoD, BBO_UM){TagArrayRead} { + t_allocateTBE; + lpc_probeInvCore; + i2_probeInvL2; + o_checkForAckCompletion; + i_popIncomingRequestQueue; + } + + transition(BBO_UM, CPUPrbResp, BBO_UM) { + ruo_rememberUntransferredOwner; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + + transition(BBO_UM, TCCPrbResp) { + ruoT_rememberUntransferredOwnerTCC; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + + transition(BBO_UM, ProbeAcksComplete, O_M) { + rU_rememberUpgrade; + nM_issueRdBlkM; + pt_popTriggerQueue; + } + + transition({S,E}, RdBlkM, BBS_M){TagArrayWrite} { + t_allocateTBE; + ldc_probeInvCoreData; + ld2_probeInvL2Data; + o_checkForAckCompletion; + i_popIncomingRequestQueue; + } + + transition(BBS_M, CPUPrbResp) { + ccr_copyCoreResponseToTBE; + rR_removeResponderFromSharers; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + + transition(BBS_M, TCCPrbResp) { + ctr_copyTCCResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + + transition(BBS_M, ProbeAcksComplete, S_M) { + nM_issueRdBlkM; + pt_popTriggerQueue; + } + + transition(O, RdBlkM, BBO_M){TagArrayRead} { + t_allocateTBE; + ldc_probeInvCoreData; + ld2_probeInvL2Data; + o_checkForAckCompletion; + i_popIncomingRequestQueue; + } + + transition(BBO_M, CPUPrbResp) { + ccr_copyCoreResponseToTBE; + rR_removeResponderFromSharers; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + + transition(BBO_M, TCCPrbResp) { + ctr_copyTCCResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + + transition(BBO_M, ProbeAcksComplete, O_M) { + nM_issueRdBlkM; + pt_popTriggerQueue; + } + + // + transition(M, RdBlkM, BBM_M){TagArrayRead} { + t_allocateTBE; + ldc_probeInvCoreData; + ld2_probeInvL2Data; + i_popIncomingRequestQueue; + } + + transition(BBM_M, CPUPrbResp) { + ccr_copyCoreResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + + // TCP recalled block before receiving probe + transition({BBM_M, BBS_M, BBO_M}, {CPUWrite,NoCPUWrite}) { + zz_recycleRequest; + } + + transition(BBM_M, TCCPrbResp) { + ctr_copyTCCResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + + transition(BBM_M, ProbeAcksComplete, BB_M) { + sM_sendResponseM; + pt_popTriggerQueue; + } + + transition(BB_M, CoreUnblock, M){TagArrayWrite} { + e_ownerIsUnblocker; + dt_deallocateTBE; + j_popIncomingUnblockQueue; + } + + transition(M, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} { + t_allocateTBE; + sc_probeShrCoreData; + s2_probeShrL2Data; + i_popIncomingRequestQueue; + } + + transition(E, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} { + t_allocateTBE; + eto_moveExSharerToOwner; + sc_probeShrCoreData; + s2_probeShrL2Data; + i_popIncomingRequestQueue; + } + + transition(BBM_O, CPUPrbResp) { + ccr_copyCoreResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + transition(BBM_O, TCCPrbResp) { + ctr_copyTCCResponseToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + transition(BBM_O, ProbeAcksComplete, BB_O) { + sS_sendResponseS; + pt_popTriggerQueue; + } + + transition(BB_O, CoreUnblock, O){TagArrayWrite} { + as_addToSharers; + dt_deallocateTBE; + j_popIncomingUnblockQueue; + } + + transition({BBO_O, BBM_M, BBS_S, BBM_O, BB_M, BB_O, BB_S, BBO_UM, BBS_UM, BBS_M, BBO_M, BB_OO}, {PrbInvData, PrbInv,PrbShrData}) { + ww_recycleProbeRequest; + } + + transition({BBM_O, BBS_S, CP_S, CP_O, CP_SM, CP_OM, BBO_O}, {CPUWrite,NoCPUWrite}) { + zz_recycleRequest; + } + + // stale CtoD raced with external invalidation + transition({I, CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, CtoD) { + i_popIncomingRequestQueue; + } + + // stale CtoD raced with internal RdBlkM + transition({BBM_M, BBS_M, BBO_M, BBB_M, BBS_UM, BBO_UM}, CtoD) { + i_popIncomingRequestQueue; + } + + transition({E, M}, CtoD) { + i_popIncomingRequestQueue; + } + + + // TCC-directory has sent out (And potentially received acks for) probes. + // TCP/SQC replacement (known to be stale subsequent) are popped off. + transition({BBO_UM, BBS_UM}, {CPUWrite,NoCPUWrite}) { + nC_sendNullWBAckToCore; + i_popIncomingRequestQueue; + } + + transition(S_M, {NoCPUWrite, CPUWrite}) { + zz_recycleRequest; + } + + transition(O_M, {NoCPUWrite, CPUWrite}) { + zz_recycleRequest; + } + + + transition({BBM_M, BBS_M, BBO_M, BBO_UM, BBS_UM}, {VicDirty, VicClean, VicDirtyLast, NoVic}) { + nT_sendNullWBAckToTCC; + pl_popTCCRequestQueue; + } + + transition({CP_S, CP_O, CP_OM, CP_SM}, {VicDirty, VicClean, VicDirtyLast, CancelWB, NoVic}) { + yy_recycleTCCRequestQueue; + } + + // However, when TCCdir has sent out PrbSharedData, one cannot ignore. + transition({BBS_S, BBO_O, BBM_O, S_M, O_M, BBB_M, BBB_S, BBB_E}, {VicDirty, VicClean, VicDirtyLast,CancelWB}) { + yy_recycleTCCRequestQueue; + } + + transition({BW_S,BW_E,BW_O, BW_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) { + yy_recycleTCCRequestQueue; + } + + transition({BW_S,BW_E,BW_O, BW_M}, CancelWB) { + nT_sendNullWBAckToTCC; + pl_popTCCRequestQueue; + } + + + /// recycle if waiting for unblocks. + transition({BB_M,BB_O,BB_S,BB_OO}, {VicDirty, VicClean, VicDirtyLast,NoVic,CancelWB}) { + yy_recycleTCCRequestQueue; + } + + transition({BBS_S, BBO_O}, NoVic) { + rT_removeTCCFromSharers; + nT_sendNullWBAckToTCC; + pl_popTCCRequestQueue; + } + + // stale. Pop message and send dummy ack. + transition({I_S, I_ES, I_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) { + nT_sendNullWBAckToTCC; + pl_popTCCRequestQueue; + } + + transition(M, VicDirtyLast, VM_I){TagArrayRead} { + tv_allocateTBE; + vd_victim; + pl_popTCCRequestQueue; + } + + transition(E, VicDirty, VM_I){TagArrayRead} { + tv_allocateTBE; + vd_victim; + pl_popTCCRequestQueue; + } + + transition(O, VicDirty, VO_S){TagArrayRead} { + tv_allocateTBE; + vd_victim; + pl_popTCCRequestQueue; + } + + transition(O, {VicDirtyLast, VicClean}, VO_I){TagArrayRead} { + tv_allocateTBE; + vd_victim; + pl_popTCCRequestQueue; + } + + transition({E, S}, VicClean, VES_I){TagArrayRead} { + tv_allocateTBE; + vc_victim; + pl_popTCCRequestQueue; + } + + transition({O, S}, NoVic){TagArrayRead} { + rT_removeTCCFromSharers; + nT_sendNullWBAckToTCC; + pl_popTCCRequestQueue; + } + + transition({O,S}, NoCPUWrite){TagArrayRead} { + rC_removeCoreFromSharers; + nC_sendNullWBAckToCore; + i_popIncomingRequestQueue; + } + + transition({M,E}, NoCPUWrite){TagArrayRead} { + rC_removeCoreFromSharers; + nC_sendNullWBAckToCore; + i_popIncomingRequestQueue; + } + + // This can only happen if it is race. (TCCdir sent out probes which caused this cancel in the first place.) + transition({VM_I, VES_I, VO_I}, CancelWB) { + pl_popTCCRequestQueue; + } + + transition({VM_I, VES_I, VO_I}, NB_AckWB, I){TagArrayWrite} { + c_clearOwner; + cc_clearSharers; + wb_data; + fw2_forwardWBAck; + dt_deallocateTBE; + dd_deallocateDir; + pR_popResponseFromNBQueue; + } + + transition(VO_S, NB_AckWB, S){TagArrayWrite} { + c_clearOwner; + wb_data; + fw2_forwardWBAck; + dt_deallocateTBE; + pR_popResponseFromNBQueue; + } + + transition(I_C, NB_AckWB, I){TagArrayWrite} { + c_clearOwner; + cc_clearSharers; + ss_sendStaleNotification; + fw2_forwardWBAck; + dt_deallocateTBE; + dd_deallocateDir; + pR_popResponseFromNBQueue; + } + + transition(I_W, NB_AckWB, I) { + ss_sendStaleNotification; + dt_deallocateTBE; + dd_deallocateDir; + pR_popResponseFromNBQueue; + } + + + + // Do not handle replacements, reads of any kind or writebacks from transients; recycle + transition({I_M, I_ES, I_S, MO_I, ES_I, S_M, O_M, VES_I, VO_I, VO_S, VM_I, I_C, I_W}, {RdBlkS,RdBlkM,RdBlk,CtoD}) { + zz_recycleRequest; + } + + transition( VO_S, NoCPUWrite) { + zz_recycleRequest; + } + + transition({BW_M, BW_S, BW_O, BW_E}, {RdBlkS,RdBlkM,RdBlk,CtoD,NoCPUWrite, CPUWrite}) { + zz_recycleRequest; + } + + transition({BBB_M, BBB_S, BBB_E, BB_O, BB_M, BB_S, BB_OO}, { RdBlk, RdBlkS, RdBlkM, CPUWrite, NoCPUWrite}) { + zz_recycleRequest; + } + + transition({BBB_S, BBB_E, BB_O, BB_S, BB_OO}, { CtoD}) { + zz_recycleRequest; + } + + transition({BBS_UM, BBO_UM, BBM_M, BBM_O, BBS_M, BBO_M}, { RdBlk, RdBlkS, RdBlkM}) { + zz_recycleRequest; + } + + transition(BBM_O, CtoD) { + zz_recycleRequest; + } + + transition({BBS_S, BBO_O}, {RdBlkM, CtoD}) { + zz_recycleRequest; + } + + transition({B_I, CP_I, CP_S, CP_O, CP_OM, CP_SM, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {RdBlk, RdBlkS, RdBlkM}) { + zz_recycleRequest; + } + + transition({CP_O, CP_S, CP_OM}, CtoD) { + zz_recycleRequest; + } + + // Ignore replacement related messages after probe got in. + transition({CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {CPUWrite, NoCPUWrite}) { + zz_recycleRequest; + } + + // Ignore replacement related messages after probes processed + transition({I, I_S, I_ES, I_M, I_C, I_W}, {CPUWrite,NoCPUWrite}) { + nC_sendNullWBAckToCore; + i_popIncomingRequestQueue; + } + // cannot ignore cancel... otherwise TCP/SQC will be stuck in I_C + transition({I, I_S, I_ES, I_M, I_C, I_W, S_M, M, O, E, S}, CPUWriteCancel){TagArrayRead} { + nC_sendNullWBAckToCore; + i_popIncomingRequestQueue; + } + + transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, {NoVic, VicClean, VicDirty, VicDirtyLast}){ + nT_sendNullWBAckToTCC; + pl_popTCCRequestQueue; + } + + // Handling Probes from NB (General process: (1) propagate up, go to blocking state (2) process acks (3) on last ack downward.) + + // step 1 + transition({M, O, E, S}, PrbInvData, CP_I){TagArrayRead} { + tp_allocateTBE; + dc_probeInvCoreData; + d2_probeInvL2Data; + pp_popProbeQueue; + } + // step 2a + transition(CP_I, CPUPrbResp) { + y_writeDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + // step 2b + transition(CP_I, TCCPrbResp) { + ty_writeTCCDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + // step 3 + transition(CP_I, ProbeAcksComplete, I){TagArrayWrite} { + pd_sendProbeResponseData; + c_clearOwner; + cc_clearSharers; + dt_deallocateTBE; + dd_deallocateDir; + pt_popTriggerQueue; + } + + // step 1 + transition({M, O, E, S}, PrbInv, B_I){TagArrayWrite} { + tp_allocateTBE; + ipc_probeInvCore; + i2_probeInvL2; + pp_popProbeQueue; + } + // step 2 + transition(B_I, CPUPrbResp) { + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + // step 2b + transition(B_I, TCCPrbResp) { + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + // step 3 + transition(B_I, ProbeAcksComplete, I){TagArrayWrite} { + // send response down to NB + pi_sendProbeResponseInv; + c_clearOwner; + cc_clearSharers; + dt_deallocateTBE; + dd_deallocateDir; + pt_popTriggerQueue; + } + + + // step 1 + transition({M, O}, PrbShrData, CP_O){TagArrayRead} { + tp_allocateTBE; + sc_probeShrCoreData; + s2_probeShrL2Data; + pp_popProbeQueue; + } + + transition(E, PrbShrData, CP_O){TagArrayRead} { + tp_allocateTBE; + eto_moveExSharerToOwner; + sc_probeShrCoreData; + s2_probeShrL2Data; + pp_popProbeQueue; + } + // step 2 + transition(CP_O, CPUPrbResp) { + y_writeDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + // step 2b + transition(CP_O, TCCPrbResp) { + ty_writeTCCDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + // step 3 + transition(CP_O, ProbeAcksComplete, O){TagArrayWrite} { + // send response down to NB + pd_sendProbeResponseData; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + //step 1 + transition(S, PrbShrData, CP_S) { + tp_allocateTBE; + sc_probeShrCoreData; + s2_probeShrL2Data; + pp_popProbeQueue; + } + // step 2 + transition(CP_S, CPUPrbResp) { + y_writeDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + // step 2b + transition(CP_S, TCCPrbResp) { + ty_writeTCCDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + // step 3 + transition(CP_S, ProbeAcksComplete, S) { + // send response down to NB + pd_sendProbeResponseData; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + // step 1 + transition(O_M, PrbInvData, CP_IOM) { + dc_probeInvCoreData; + d2_probeInvL2Data; + pp_popProbeQueue; + } + // step 2a + transition(CP_IOM, CPUPrbResp) { + y_writeDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + // step 2b + transition(CP_IOM, TCCPrbResp) { + ty_writeTCCDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + // step 3 + transition(CP_IOM, ProbeAcksComplete, I_M) { + pdm_sendProbeResponseDataMs; + c_clearOwner; + cc_clearSharers; + cd_clearDirtyBitTBE; + pt_popTriggerQueue; + } + + transition(CP_IOM, ProbeAcksCompleteReissue, I){TagArrayWrite} { + pdm_sendProbeResponseDataMs; + c_clearOwner; + cc_clearSharers; + dt_deallocateTBE; + dd_deallocateDir; + pt_popTriggerQueue; + } + + // step 1 + transition(S_M, PrbInvData, CP_ISM) { + dc_probeInvCoreData; + d2_probeInvL2Data; + o_checkForAckCompletion; + pp_popProbeQueue; + } + // step 2a + transition(CP_ISM, CPUPrbResp) { + y_writeDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + // step 2b + transition(CP_ISM, TCCPrbResp) { + ty_writeTCCDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + // step 3 + transition(CP_ISM, ProbeAcksComplete, I_M) { + pdm_sendProbeResponseDataMs; + c_clearOwner; + cc_clearSharers; + cd_clearDirtyBitTBE; + + //dt_deallocateTBE; + pt_popTriggerQueue; + } + transition(CP_ISM, ProbeAcksCompleteReissue, I){TagArrayWrite} { + pim_sendProbeResponseInvMs; + c_clearOwner; + cc_clearSharers; + dt_deallocateTBE; + dd_deallocateDir; + pt_popTriggerQueue; + } + + // step 1 + transition({S_M, O_M}, {PrbInv}, CP_ISM) { + dc_probeInvCoreData; + d2_probeInvL2Data; + pp_popProbeQueue; + } + // next steps inherited from BS_ISM + + // Simpler cases + + transition({I_C, I_W}, {PrbInvData, PrbInv, PrbShrData}) { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + //If the directory is certain that the block is not present, one can send an acknowledgement right away. + // No need for three step process. + transition(I, {PrbInv,PrbShrData,PrbInvData}){TagArrayRead} { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition({I_M, I_ES, I_S}, {PrbInv, PrbInvData}) { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition({I_M, I_ES, I_S}, PrbShrData) { + prm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + //step 1 + transition(S_M, PrbShrData, CP_SM) { + sc_probeShrCoreData; + s2_probeShrL2Data; + o_checkForAckCompletion; + pp_popProbeQueue; + } + // step 2 + transition(CP_SM, CPUPrbResp) { + y_writeDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + // step 2b + transition(CP_SM, TCCPrbResp) { + ty_writeTCCDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + // step 3 + transition(CP_SM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, S_M){DataArrayRead} { + // send response down to NB + pd_sendProbeResponseData; + pt_popTriggerQueue; + } + + //step 1 + transition(O_M, PrbShrData, CP_OM) { + sc_probeShrCoreData; + s2_probeShrL2Data; + pp_popProbeQueue; + } + // step 2 + transition(CP_OM, CPUPrbResp) { + y_writeDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + // step 2b + transition(CP_OM, TCCPrbResp) { + ty_writeTCCDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + // step 3 + transition(CP_OM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, O_M) { + // send response down to NB + pd_sendProbeResponseData; + pt_popTriggerQueue; + } + + transition(BRW_I, PrbInvData, I_W) { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({VM_I,VO_I}, PrbInvData, I_C) { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition(VES_I, {PrbInvData,PrbInv}, I_C) { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition({VM_I, VO_I, BRW_I}, PrbInv, I_W) { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition({VM_I, VO_I, VO_S, VES_I, BRW_I}, PrbShrData) { + pd_sendProbeResponseData; + sf_setSharedFlip; + pp_popProbeQueue; + } + + transition(VO_S, PrbInvData, CP_OSIW) { + dc_probeInvCoreData; + d2_probeInvL2Data; + pp_popProbeQueue; + } + + transition(CP_OSIW, TCCPrbResp) { + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + transition(CP_OSIW, CPUPrbResp) { + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + + transition(CP_OSIW, ProbeAcksComplete, I_C) { + pd_sendProbeResponseData; + cd_clearDirtyBitTBE; + pt_popTriggerQueue; + } + + transition({I, S, E, O, M, CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W}, StaleVic) { + nT_sendNullWBAckToTCC; + pl_popTCCRequestQueue; + } + + transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, StaleVic) { + nT_sendNullWBAckToTCC; + pl_popTCCRequestQueue; + } + + // Recall Transistions + // transient states still require the directory state + transition({M, O}, Recall, BRWD_I) { + tr_allocateTBE; + vd_victim; + dc_probeInvCoreData; + d2_probeInvL2Data; + } + + transition({E, S}, Recall, BRWD_I) { + tr_allocateTBE; + vc_victim; + dc_probeInvCoreData; + d2_probeInvL2Data; + } + + transition(I, Recall) { + dd_deallocateDir; + } + + transition({BRWD_I, BRD_I}, CPUPrbResp) { + y_writeDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + pk_popResponseQueue; + } + + transition({BRWD_I, BRD_I}, TCCPrbResp) { + ty_writeTCCDataToTBE; + x_decrementAcks; + o_checkForAckCompletion; + plr_popTCCResponseQueue; + } + + transition(BRWD_I, NB_AckWB, BRD_I) { + pR_popResponseFromNBQueue; + } + + transition(BRWD_I, ProbeAcksComplete, BRW_I) { + pt_popTriggerQueue; + } + + transition(BRW_I, NB_AckWB, I) { + wb_data; + dt_deallocateTBE; + dd_deallocateDir; + pR_popResponseFromNBQueue; + } + + transition(BRD_I, ProbeAcksComplete, I) { + wb_data; + dt_deallocateTBE; + dd_deallocateDir; + pt_popTriggerQueue; + } + + // wait for stable state for Recall + transition({BRWD_I,BRD_I,BRW_I,CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W, CP_I}, Recall) { + zz_recycleRequest; // stall and wait would be for the wrong address + ut_updateTag; // try to find an easier recall + } + +} diff --git a/src/mem/protocol/GPU_RfO-TCP.sm b/src/mem/protocol/GPU_RfO-TCP.sm new file mode 100644 index 000000000..6cf9224a6 --- /dev/null +++ b/src/mem/protocol/GPU_RfO-TCP.sm @@ -0,0 +1,1009 @@ +/* + * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") + : GPUCoalescer* coalescer; + Sequencer* sequencer; + bool use_seq_not_coal; + CacheMemory * L1cache; + int TCC_select_num_bits; + Cycles issue_latency := 40; // time to send data down to TCC + Cycles l2_hit_latency := 18; + + MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request"; + MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response"; + MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock"; + + MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request"; + MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response"; + + MessageBuffer * mandatoryQueue; +{ + state_declaration(State, desc="TCP Cache States", default="TCP_State_I") { + I, AccessPermission:Invalid, desc="Invalid"; + S, AccessPermission:Read_Only, desc="Shared"; + E, AccessPermission:Read_Write, desc="Exclusive"; + O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line"; + M, AccessPermission:Read_Write, desc="Modified"; + + I_M, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet"; + I_ES, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet"; + S_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + O_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + + ES_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack"; + MO_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for dirty WB ack"; + + MO_PI, AccessPermission:Read_Only, desc="L1 downgrade, waiting for CtoD ack (or ProbeInvalidateData)"; + + I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCC for canceled WB"; + } + + enumeration(Event, desc="TCP Events") { + // Core initiated + Load, desc="Load"; + Store, desc="Store"; + + // TCC initiated + TCC_AckS, desc="TCC Ack to Core Request"; + TCC_AckE, desc="TCC Ack to Core Request"; + TCC_AckM, desc="TCC Ack to Core Request"; + TCC_AckCtoD, desc="TCC Ack to Core Request"; + TCC_AckWB, desc="TCC Ack for clean WB"; + TCC_NackWB, desc="TCC Nack for clean WB"; + + // Mem sys initiated + Repl, desc="Replacing block from cache"; + + // Probe Events + PrbInvData, desc="probe, return O or M data"; + PrbInv, desc="probe, no need for data"; + LocalPrbInv, desc="local probe, no need for data"; + PrbShrData, desc="probe downgrade, return O or M data"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff than memory)?"; + DataBlock DataBlk, desc="data for the block"; + bool FromL2, default="false", desc="block just moved from L2"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; + bool Shared, desc="Victim hit by shared probe"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs"; + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + // Internal functions + Entry getCacheEntry(Addr address), return_by_pointer="yes" { + Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address)); + return cache_entry; + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return tbe.DataBlk; + } else { + return getCacheEntry(addr).DataBlk; + } + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if(is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return TCP_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return TCP_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + bool isValid(Addr addr) { + AccessPermission perm := getAccessPermission(addr); + if (perm == AccessPermission:NotPresent || + perm == AccessPermission:Invalid || + perm == AccessPermission:Busy) { + return false; + } else { + return true; + } + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(TCP_State_to_permission(state)); + } + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:DataArrayWrite) { + L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:TagArrayRead) { + L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayWrite) { + L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + MachineType getCoherenceType(MachineID myMachID, + MachineID senderMachID) { + if(myMachID == senderMachID) { + return MachineType:TCP; + } else if(machineIDToMachineType(senderMachID) == MachineType:TCP) { + return MachineType:L1Cache_wCC; + } else if(machineIDToMachineType(senderMachID) == MachineType:TCC) { + return MachineType:TCC; + } else { + return MachineType:TCCdir; + } + } + + // Out Ports + + out_port(requestNetwork_out, CPURequestMsg, requestFromTCP); + out_port(responseNetwork_out, ResponseMsg, responseFromTCP); + out_port(unblockNetwork_out, UnblockMsg, unblockFromCore); + + // In Ports + + in_port(probeNetwork_in, TDProbeRequestMsg, probeToTCP) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") { + DPRINTF(RubySlicc, "%s\n", in_msg); + DPRINTF(RubySlicc, "machineID: %s\n", machineID); + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == ProbeRequestType:PrbInv) { + if (in_msg.ReturnData) { + trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); + } else { + if(in_msg.localCtoD) { + trigger(Event:LocalPrbInv, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); + } + } + } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { + assert(in_msg.ReturnData); + trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); + } + } + } + } + + in_port(responseToTCP_in, ResponseMsg, responseToTCP) { + if (responseToTCP_in.isReady(clockEdge())) { + peek(responseToTCP_in, ResponseMsg, block_on="addr") { + + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == CoherenceResponseType:TDSysResp) { + if (in_msg.State == CoherenceState:Modified) { + if (in_msg.CtoD) { + trigger(Event:TCC_AckCtoD, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:TCC_AckM, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.State == CoherenceState:Shared) { + trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe); + } else if (in_msg.State == CoherenceState:Exclusive) { + trigger(Event:TCC_AckE, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) { + trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) { + trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe); + } else { + error("Unexpected Response Message to Core"); + } + } + } + } + + in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { + if (mandatoryQueue_in.isReady(clockEdge())) { + peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { + Entry cache_entry := getCacheEntry(in_msg.LineAddress); + TBE tbe := TBEs.lookup(in_msg.LineAddress); + DPRINTF(RubySlicc, "%s\n", in_msg); + if (in_msg.Type == RubyRequestType:LD) { + if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { + trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe); + } else { + Addr victim := L1cache.cacheProbe(in_msg.LineAddress); + trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else { + if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { + trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe); + } else { + Addr victim := L1cache.cacheProbe(in_msg.LineAddress); + trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } + } + } + } + + // Actions + + action(ic_invCache, "ic", desc="invalidate cache") { + if(is_valid(cache_entry)) { + L1cache.deallocate(address); + } + unset_cache_entry(); + } + + action(n_issueRdBlk, "n", desc="Issue RdBlk") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlk; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkM; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(vd_victim, "vd", desc="Victimize M/O Data") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + assert(is_valid(cache_entry)); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicDirty; + out_msg.InitialRequestTime := curCycle(); + if (cache_entry.CacheState == State:O) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + out_msg.Dirty := cache_entry.Dirty; + } + } + + action(vc_victim, "vc", desc="Victimize E/S Data") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicClean; + out_msg.InitialRequestTime := curCycle(); + if (cache_entry.CacheState == State:S) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + } + } + + action(a_allocate, "a", desc="allocate block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L1cache.allocate(address, new Entry)); + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + assert(is_valid(cache_entry)); + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs + tbe.Dirty := cache_entry.Dirty; + tbe.Shared := false; + } + + action(d_deallocateTBE, "d", desc="Deallocate TBE") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { + mandatoryQueue_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { + responseToTCP_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="pop probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + + action(l_loadDone, "l", desc="local load done") { + assert(is_valid(cache_entry)); + if (use_seq_not_coal) { + sequencer.readCallback(address, cache_entry.DataBlk, + false, MachineType:TCP); + } else { + coalescer.readCallback(address, MachineType:TCP, cache_entry.DataBlk); + } + } + + action(xl_loadDone, "xl", desc="remote load done") { + peek(responseToTCP_in, ResponseMsg) { + assert(is_valid(cache_entry)); + if (use_seq_not_coal) { + coalescer.recordCPReadCallBack(machineID, in_msg.Sender); + sequencer.readCallback(address, + cache_entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } else { + MachineType cc_mach_type := getCoherenceType(machineID, + in_msg.Sender); + coalescer.readCallback(address, + cc_mach_type, + cache_entry.DataBlk, + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + } + } + + action(s_storeDone, "s", desc="local store done") { + assert(is_valid(cache_entry)); + if (use_seq_not_coal) { + coalescer.recordCPWriteCallBack(machineID, machineID); + sequencer.writeCallback(address, cache_entry.DataBlk, + false, MachineType:TCP); + } else { + coalescer.writeCallback(address, MachineType:TCP, cache_entry.DataBlk); + } + cache_entry.Dirty := true; + } + + action(xs_storeDone, "xs", desc="remote store done") { + peek(responseToTCP_in, ResponseMsg) { + assert(is_valid(cache_entry)); + if (use_seq_not_coal) { + coalescer.recordCPWriteCallBack(machineID, in_msg.Sender); + sequencer.writeCallback(address, + cache_entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } else { + MachineType cc_mach_type := getCoherenceType(machineID, + in_msg.Sender); + coalescer.writeCallback(address, + cc_mach_type, + cache_entry.DataBlk, + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + cache_entry.Dirty := true; + } + } + + action(w_writeCache, "w", desc="write data to cache") { + peek(responseToTCP_in, ResponseMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { + peek(responseToTCP_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:StaleNotif; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(wb_data, "wb", desc="write back data") { + peek(responseToTCP_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUData; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + if (tbe.Shared) { + out_msg.NbReqShared := true; + } else { + out_msg.NbReqShared := false; + } + out_msg.State := CoherenceState:Shared; // faux info + out_msg.MessageSize := MessageSizeType:Writeback_Data; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(piu_sendProbeResponseInvUntransferredOwnership, "piu", desc="send probe ack inv, no data, retain ownership") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes + out_msg.Sender := machineID; + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.UntransferredOwner :=true; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.isValid := isValid(address); + } + } + + action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; + out_msg.Ntsl := true; + out_msg.Hit := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.isValid := isValid(address); + } + } + + action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Dirty := false; // only true if sending back data i think + out_msg.Hit := false; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.isValid := isValid(address); + } + } + + action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry) || is_valid(tbe)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.DataBlk := getDataBlock(address); + if (is_valid(tbe)) { + out_msg.Dirty := tbe.Dirty; + } else { + out_msg.Dirty := cache_entry.Dirty; + } + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.isValid := isValid(address); + APPEND_TRANSITION_COMMENT("Sending ack with dirty "); + APPEND_TRANSITION_COMMENT(out_msg.Dirty); + } + } + + action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry) || is_valid(tbe)); + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.DataBlk := getDataBlock(address); + if (is_valid(tbe)) { + out_msg.Dirty := tbe.Dirty; + } else { + out_msg.Dirty := cache_entry.Dirty; + } + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.isValid := isValid(address); + APPEND_TRANSITION_COMMENT("Sending ack with dirty "); + APPEND_TRANSITION_COMMENT(out_msg.Dirty); + DPRINTF(RubySlicc, "Data is %s\n", out_msg.DataBlk); + } + } + + action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") { + assert(is_valid(tbe)); + tbe.Shared := true; + } + + action(mru_updateMRU, "mru", desc="Touch block for replacement policy") { + L1cache.setMRU(address); + } + + action(uu_sendUnblock, "uu", desc="state changed, unblock") { + enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { + out_msg.addr := address; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + out_msg.wasValid := isValid(address); + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") { + probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") { + mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + // Transitions + + // transitions from base + transition(I, Load, I_ES) {TagArrayRead} { + a_allocate; + n_issueRdBlk; + p_popMandatoryQueue; + } + + transition(I, Store, I_M) {TagArrayRead, TagArrayWrite} { + a_allocate; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(S, Store, S_M) {TagArrayRead} { + mru_updateMRU; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(E, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + mru_updateMRU; + s_storeDone; + p_popMandatoryQueue; + } + + transition(O, Store, O_M) {TagArrayRead, DataArrayWrite} { + mru_updateMRU; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(M, Store) {TagArrayRead, DataArrayWrite} { + mru_updateMRU; + s_storeDone; + p_popMandatoryQueue; + } + + // simple hit transitions + transition({S, E, O, M}, Load) {TagArrayRead, DataArrayRead} { + l_loadDone; + mru_updateMRU; + p_popMandatoryQueue; + } + + // recycles from transients + transition({I_M, I_ES, ES_I, MO_I, S_M, O_M, MO_PI, I_C}, {Load, Store, Repl}) {} { + zz_recycleMandatoryQueue; + } + + transition({S, E}, Repl, ES_I) {TagArrayRead} { + t_allocateTBE; + vc_victim; + ic_invCache; + } + + transition({O, M}, Repl, MO_I) {TagArrayRead, DataArrayRead} { + t_allocateTBE; + vd_victim; + ic_invCache; + } + + // TD event transitions + transition(I_M, {TCC_AckM, TCC_AckCtoD}, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + w_writeCache; + xs_storeDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_ES, TCC_AckS, S) {TagArrayWrite, DataArrayWrite} { + w_writeCache; + xl_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_ES, TCC_AckE, E) {TagArrayWrite, DataArrayWrite} { + w_writeCache; + xl_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition({S_M, O_M}, TCC_AckM, M) {TagArrayWrite, DataArrayWrite} { + xs_storeDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition({MO_I, ES_I}, TCC_NackWB, I){TagArrayWrite} { + d_deallocateTBE; + pr_popResponseQueue; + } + + transition({MO_I, ES_I}, TCC_AckWB, I) {TagArrayWrite, DataArrayRead} { + wb_data; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(I_C, TCC_AckWB, I) {TagArrayWrite} { + ss_sendStaleNotification; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(I_C, TCC_NackWB, I) {TagArrayWrite} { + d_deallocateTBE; + pr_popResponseQueue; + } + + // Probe transitions + transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { + pd_sendProbeResponseData; + ic_invCache; + pp_popProbeQueue; + } + + transition(I, PrbInvData) {TagArrayRead, TagArrayWrite} { + prm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + transition({E, S}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { + pd_sendProbeResponseData; + ic_invCache; + pp_popProbeQueue; + } + + transition(I_C, PrbInvData, I_C) {} { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + // Needed for TCC-based protocols. Must hold on to ownership till transfer complete + transition({M, O}, LocalPrbInv, MO_PI){TagArrayRead, TagArrayWrite} { + piu_sendProbeResponseInvUntransferredOwnership; + pp_popProbeQueue; + } + + // If there is a race and we see a probe invalidate, handle normally. + transition(MO_PI, PrbInvData, I){TagArrayWrite} { + pd_sendProbeResponseData; + ic_invCache; + pp_popProbeQueue; + } + + transition(MO_PI, PrbInv, I){TagArrayWrite} { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + // normal exit when ownership is successfully transferred + transition(MO_PI, TCC_AckCtoD, I) {TagArrayWrite} { + ic_invCache; + pr_popResponseQueue; + } + + transition({M, O, E, S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + transition({E, S, I}, LocalPrbInv, I){TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + + transition({M, E, O}, PrbShrData, O) {TagArrayRead, TagArrayWrite, DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition(MO_PI, PrbShrData) {DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + + transition(S, PrbShrData, S) {TagArrayRead, DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({I, I_C}, PrbShrData) {TagArrayRead} { + prm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + transition(I_C, PrbInv, I_C) {} { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + transition({I_M, I_ES}, {PrbInv, PrbInvData}){TagArrayRead} { + pi_sendProbeResponseInv; + ic_invCache; + a_allocate; // but make sure there is room for incoming data when it arrives + pp_popProbeQueue; + } + + transition({I_M, I_ES}, PrbShrData) {} { + prm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + transition(S_M, PrbInvData, I_M) {TagArrayRead} { + pim_sendProbeResponseInvMs; + ic_invCache; + a_allocate; + pp_popProbeQueue; + } + + transition(O_M, PrbInvData, I_M) {TagArrayRead,DataArrayRead} { + pdm_sendProbeResponseDataMs; + ic_invCache; + a_allocate; + pp_popProbeQueue; + } + + transition({S_M, O_M}, {PrbInv}, I_M) {TagArrayRead} { + pim_sendProbeResponseInvMs; + ic_invCache; + a_allocate; + pp_popProbeQueue; + } + + transition(S_M, {LocalPrbInv}, I_M) {TagArrayRead} { + pim_sendProbeResponseInvMs; + ic_invCache; + a_allocate; + pp_popProbeQueue; + } + + transition(O_M, LocalPrbInv, I_M) {TagArrayRead} { + piu_sendProbeResponseInvUntransferredOwnership; + ic_invCache; + a_allocate; + pp_popProbeQueue; + } + + transition({S_M, O_M}, PrbShrData) {DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition(ES_I, PrbInvData, I_C){ + pd_sendProbeResponseData; + ic_invCache; + pp_popProbeQueue; + } + + transition(MO_I, PrbInvData, I_C) {DataArrayRead} { + pd_sendProbeResponseData; + ic_invCache; + pp_popProbeQueue; + } + + transition(MO_I, PrbInv, I_C) { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + transition(ES_I, PrbInv, I_C) { + pi_sendProbeResponseInv; + ic_invCache; + pp_popProbeQueue; + } + + transition(ES_I, PrbShrData, ES_I) {DataArrayRead} { + pd_sendProbeResponseData; + sf_setSharedFlip; + pp_popProbeQueue; + } + + transition(MO_I, PrbShrData, MO_I) {DataArrayRead} { + pd_sendProbeResponseData; + sf_setSharedFlip; + pp_popProbeQueue; + } + +} diff --git a/src/mem/protocol/GPU_RfO.slicc b/src/mem/protocol/GPU_RfO.slicc new file mode 100644 index 000000000..7773ce6e0 --- /dev/null +++ b/src/mem/protocol/GPU_RfO.slicc @@ -0,0 +1,11 @@ +protocol "GPU_AMD_Base"; +include "RubySlicc_interfaces.slicc"; +include "MOESI_AMD_Base-msg.sm"; +include "MOESI_AMD_Base-dir.sm"; +include "MOESI_AMD_Base-CorePair.sm"; +include "GPU_RfO-TCP.sm"; +include "GPU_RfO-SQC.sm"; +include "GPU_RfO-TCC.sm"; +include "GPU_RfO-TCCdir.sm"; +include "MOESI_AMD_Base-L3cache.sm"; +include "MOESI_AMD_Base-RegionBuffer.sm"; diff --git a/src/mem/protocol/GPU_VIPER-SQC.sm b/src/mem/protocol/GPU_VIPER-SQC.sm new file mode 100644 index 000000000..8d5b5699a --- /dev/null +++ b/src/mem/protocol/GPU_VIPER-SQC.sm @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Blake Hechtman + */ + +machine(MachineType:SQC, "GPU SQC (L1 I Cache)") + : Sequencer* sequencer; + CacheMemory * L1cache; + int TCC_select_num_bits; + Cycles issue_latency := 80; // time to send data down to TCC + Cycles l2_hit_latency := 18; // for 1MB L2, 20 for 2MB + + MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request"; + + MessageBuffer * probeToSQC, network="From", virtual_network="1", vnet_type="request"; + MessageBuffer * responseToSQC, network="From", virtual_network="3", vnet_type="response"; + + MessageBuffer * mandatoryQueue; +{ + state_declaration(State, desc="SQC Cache States", default="SQC_State_I") { + I, AccessPermission:Invalid, desc="Invalid"; + V, AccessPermission:Read_Only, desc="Valid"; + } + + enumeration(Event, desc="SQC Events") { + // Core initiated + Fetch, desc="Fetch"; + // Mem sys initiated + Repl, desc="Replacing block from cache"; + Data, desc="Received Data"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff than memory)?"; + DataBlock DataBlk, desc="data for the block"; + bool FromL2, default="false", desc="block just moved from L2"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; + bool Shared, desc="Victim hit by shared probe"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<SQC_TBE>", constructor="m_number_of_TBEs"; + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + // Internal functions + Tick clockEdge(); + + Entry getCacheEntry(Addr address), return_by_pointer="yes" { + Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address)); + return cache_entry; + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return tbe.DataBlk; + } else { + return getCacheEntry(addr).DataBlk; + } + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if(is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return SQC_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return SQC_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(SQC_State_to_permission(state)); + } + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:DataArrayWrite) { + L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:TagArrayRead) { + L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayWrite) { + L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + // Out Ports + + out_port(requestNetwork_out, CPURequestMsg, requestFromSQC); + + // In Ports + + in_port(responseToSQC_in, ResponseMsg, responseToSQC) { + if (responseToSQC_in.isReady(clockEdge())) { + peek(responseToSQC_in, ResponseMsg, block_on="addr") { + + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == CoherenceResponseType:TDSysResp) { + if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) { + trigger(Event:Data, in_msg.addr, cache_entry, tbe); + } else { + Addr victim := L1cache.cacheProbe(in_msg.addr); + trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else { + error("Unexpected Response Message to Core"); + } + } + } + } + + in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { + if (mandatoryQueue_in.isReady(clockEdge())) { + peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { + Entry cache_entry := getCacheEntry(in_msg.LineAddress); + TBE tbe := TBEs.lookup(in_msg.LineAddress); + + assert(in_msg.Type == RubyRequestType:IFETCH); + trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe); + } + } + } + + // Actions + + action(ic_invCache, "ic", desc="invalidate cache") { + if(is_valid(cache_entry)) { + L1cache.deallocate(address); + } + unset_cache_entry(); + } + + action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlk; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(a_allocate, "a", desc="allocate block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L1cache.allocate(address, new Entry)); + } + } + + action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { + mandatoryQueue_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { + responseToSQC_in.dequeue(clockEdge()); + } + + action(l_loadDone, "l", desc="local load done") { + assert(is_valid(cache_entry)); + sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); + APPEND_TRANSITION_COMMENT(cache_entry.DataBlk); + } + + action(w_writeCache, "w", desc="write data to cache") { + peek(responseToSQC_in, ResponseMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := false; + } + } + + // Transitions + + // transitions from base + transition({I, V}, Repl, I) {TagArrayRead, TagArrayWrite} { + ic_invCache + } + + transition(I, Data, V) {TagArrayRead, TagArrayWrite, DataArrayRead} { + a_allocate; + w_writeCache + l_loadDone; + pr_popResponseQueue; + } + + transition(I, Fetch) {TagArrayRead, TagArrayWrite} { + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + // simple hit transitions + transition(V, Fetch) {TagArrayRead, DataArrayRead} { + l_loadDone; + p_popMandatoryQueue; + } +} diff --git a/src/mem/protocol/GPU_VIPER-TCC.sm b/src/mem/protocol/GPU_VIPER-TCC.sm new file mode 100644 index 000000000..f62df9f4f --- /dev/null +++ b/src/mem/protocol/GPU_VIPER-TCC.sm @@ -0,0 +1,739 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Blake Hechtman + */ + +machine(MachineType:TCC, "TCC Cache") + : CacheMemory * L2cache; + bool WB; /*is this cache Writeback?*/ + Cycles l2_request_latency := 50; + Cycles l2_response_latency := 20; + + // From the TCPs or SQCs + MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request"; + // To the Cores. TCC deals only with TCPs/SQCs. + MessageBuffer * responseToCore, network="To", virtual_network="3", vnet_type="response"; + // From the NB + MessageBuffer * probeFromNB, network="From", virtual_network="0", vnet_type="request"; + MessageBuffer * responseFromNB, network="From", virtual_network="2", vnet_type="response"; + // To the NB + MessageBuffer * requestToNB, network="To", virtual_network="0", vnet_type="request"; + MessageBuffer * responseToNB, network="To", virtual_network="2", vnet_type="response"; + MessageBuffer * unblockToNB, network="To", virtual_network="4", vnet_type="unblock"; + + MessageBuffer * triggerQueue; + +{ + // EVENTS + enumeration(Event, desc="TCC Events") { + // Requests coming from the Cores + RdBlk, desc="RdBlk event"; + WrVicBlk, desc="L1 Write Through"; + WrVicBlkBack, desc="L1 Write Through(dirty cache)"; + Atomic, desc="Atomic Op"; + AtomicDone, desc="AtomicOps Complete"; + AtomicNotDone, desc="AtomicOps not Complete"; + Data, desc="data messgae"; + // Coming from this TCC + L2_Repl, desc="L2 Replacement"; + // Probes + PrbInv, desc="Invalidating probe"; + // Coming from Memory Controller + WBAck, desc="writethrough ack from memory"; + } + + // STATES + state_declaration(State, desc="TCC State", default="TCC_State_I") { + M, AccessPermission:Read_Write, desc="Modified(dirty cache only)"; + W, AccessPermission:Read_Write, desc="Written(dirty cache only)"; + V, AccessPermission:Read_Only, desc="Valid"; + I, AccessPermission:Invalid, desc="Invalid"; + IV, AccessPermission:Busy, desc="Waiting for Data"; + WI, AccessPermission:Busy, desc="Waiting on Writethrough Ack"; + A, AccessPermission:Busy, desc="Invalid waiting on atomici Data"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + + // STRUCTURES + + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff from memory?)"; + DataBlock DataBlk, desc="Data for the block"; + WriteMask writeMask, desc="Dirty byte mask"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block"; + bool Dirty, desc="Is the data dirty?"; + bool Shared, desc="Victim hit by shared probe"; + MachineID From, desc="Waiting for writeback from..."; + NetDest Destination, desc="Data destination"; + int numAtomics, desc="number remaining atomics"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs"; + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + + + // FUNCTION DEFINITIONS + Tick clockEdge(); + + Entry getCacheEntry(Addr addr), return_by_pointer="yes" { + return static_cast(Entry, "pointer", L2cache.lookup(addr)); + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + return getCacheEntry(addr).DataBlk; + } + + bool presentOrAvail(Addr addr) { + return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr); + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return TCC_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return TCC_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(TCC_State_to_permission(state)); + } + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:DataArrayWrite) { + L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:TagArrayRead) { + L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayWrite) { + L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + + // ** OUT_PORTS ** + + // Three classes of ports + // Class 1: downward facing network links to NB + out_port(requestToNB_out, CPURequestMsg, requestToNB); + out_port(responseToNB_out, ResponseMsg, responseToNB); + out_port(unblockToNB_out, UnblockMsg, unblockToNB); + + // Class 2: upward facing ports to GPU cores + out_port(responseToCore_out, ResponseMsg, responseToCore); + + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + // + // request queue going to NB + // + + +// ** IN_PORTS ** + in_port(triggerQueue_in, TiggerMsg, triggerQueue) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (tbe.numAtomics == 0) { + trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe); + } + } + } + } + + + + in_port(responseFromNB_in, ResponseMsg, responseFromNB) { + if (responseFromNB_in.isReady(clockEdge())) { + peek(responseFromNB_in, ResponseMsg, block_on="addr") { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:NBSysResp) { + if(presentOrAvail(in_msg.addr)) { + trigger(Event:Data, in_msg.addr, cache_entry, tbe); + } else { + Addr victim := L2cache.cacheProbe(in_msg.addr); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) { + trigger(Event:WBAck, in_msg.addr, cache_entry, tbe); + } else { + error("Unexpected Response Message to Core"); + } + } + } + } + + // Finally handling incoming requests (from TCP) and probes (from NB). + in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, NBProbeRequestMsg) { + DPRINTF(RubySlicc, "%s\n", in_msg); + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); + } + } + } + + in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) { + if (coreRequestNetwork_in.isReady(clockEdge())) { + peek(coreRequestNetwork_in, CPURequestMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + if(WB) { + if(presentOrAvail(in_msg.addr)) { + trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe); + } else { + Addr victim := L2cache.cacheProbe(in_msg.addr); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else { + trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:Atomic) { + trigger(Event:Atomic, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlk) { + trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); + } else { + DPRINTF(RubySlicc, "%s\n", in_msg); + error("Unexpected Response Message to Core"); + } + } + } + } + // BEGIN ACTIONS + + action(i_invL2, "i", desc="invalidate TCC cache block") { + if (is_valid(cache_entry)) { + L2cache.deallocate(address); + } + unset_cache_entry(); + } + + action(sd_sendData, "sd", desc="send Shared response") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Shared; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + + action(sdr_sendDataResponse, "sdr", desc="send Shared response") { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := machineID; + out_msg.Destination := tbe.Destination; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Shared; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + enqueue(unblockToNB_out, UnblockMsg, 1) { + out_msg.addr := address; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + + action(rd_requestData, "r", desc="Miss in L2, pass on") { + if(tbe.Destination.count()==1){ + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Shared := false; // unneeded for this request + out_msg.MessageSize := in_msg.MessageSize; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + } + + action(w_sendResponseWBAck, "w", desc="send WB Ack") { + peek(responseFromNB_in, ResponseMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysWBAck; + out_msg.Destination.clear(); + out_msg.Destination.add(in_msg.WTRequestor); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(swb_sendWBAck, "swb", desc="send WB Ack") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysWBAck; + out_msg.Destination.clear(); + out_msg.Destination.add(in_msg.Requestor); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") { + peek(responseFromNB_in, ResponseMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Destination.add(in_msg.WTRequestor); + out_msg.Sender := machineID; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := in_msg.DataBlk; + } + } + } + + action(a_allocateBlock, "a", desc="allocate TCC block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L2cache.allocate(address, new Entry)); + cache_entry.writeMask.clear(); + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + if (is_invalid(tbe)) { + check_allocate(TBEs); + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.Destination.clear(); + tbe.numAtomics := 0; + } + if (coreRequestNetwork_in.isReady(clockEdge())) { + peek(coreRequestNetwork_in, CPURequestMsg) { + if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){ + tbe.Destination.add(in_msg.Requestor); + } + } + } + } + + action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") { + tbe.Destination.clear(); + TBEs.deallocate(address); + unset_tbe(); + } + + action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") { + peek(responseFromNB_in, ResponseMsg) { + cache_entry.DataBlk := in_msg.DataBlk; + DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg); + } + } + + action(wdb_writeDirtyBytes, "wdb", desc="write data to TCC") { + peek(coreRequestNetwork_in, CPURequestMsg) { + cache_entry.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask); + cache_entry.writeMask.orMask(in_msg.writeMask); + DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg); + } + } + + action(wt_writeThrough, "wt", desc="write back data") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.WTRequestor := in_msg.Requestor; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:WriteThrough; + out_msg.Dirty := true; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.writeMask.orMask(in_msg.writeMask); + } + } + } + + action(wb_writeBack, "wb", desc="write back data") { + enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.WTRequestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:WriteThrough; + out_msg.Dirty := true; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.writeMask.orMask(cache_entry.writeMask); + } + } + + action(at_atomicThrough, "at", desc="write back data") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.WTRequestor := in_msg.Requestor; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:Atomic; + out_msg.Dirty := true; + out_msg.writeMask.orMask(in_msg.writeMask); + } + } + } + + action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { + enqueue(responseToNB_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { + L2cache.setMRU(address); + } + + action(p_popRequestQueue, "p", desc="pop request queue") { + coreRequestNetwork_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="pop response queue") { + responseFromNB_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="pop probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + + action(z_stall, "z", desc="stall") { + // built-in + } + + + action(ina_incrementNumAtomics, "ina", desc="inc num atomics") { + tbe.numAtomics := tbe.numAtomics + 1; + } + + + action(dna_decrementNumAtomics, "dna", desc="inc num atomics") { + tbe.numAtomics := tbe.numAtomics - 1; + if (tbe.numAtomics==0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AtomicDone; + } + } + } + + action(ptr_popTriggerQueue, "ptr", desc="pop Trigger") { + triggerQueue_in.dequeue(clockEdge()); + } + + // END ACTIONS + + // BEGIN TRANSITIONS + // transitions from base + // Assumptions for ArrayRead/Write + // TBE checked before tags + // Data Read/Write requires Tag Read + + // Stalling transitions do NOT check the tag array...and if they do, + // they can cause a resource stall deadlock! + + transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} { + z_stall; + } + transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) { //TagArrayRead} { + z_stall; + } + transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} { + z_stall; + } + transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} { + sd_sendData; + ut_updateTag; + p_popRequestQueue; + } + transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} { + t_allocateTBE; + wb_writeBack; + } + + transition(I, RdBlk, IV) {TagArrayRead} { + t_allocateTBE; + rd_requestData; + p_popRequestQueue; + } + + transition(IV, RdBlk) { + t_allocateTBE; + rd_requestData; + p_popRequestQueue; + } + + transition({V, I},Atomic, A) {TagArrayRead} { + i_invL2; + t_allocateTBE; + at_atomicThrough; + ina_incrementNumAtomics; + p_popRequestQueue; + } + + transition(A, Atomic) { + at_atomicThrough; + ina_incrementNumAtomics; + p_popRequestQueue; + } + + transition({M, W}, Atomic, WI) {TagArrayRead} { + t_allocateTBE; + wb_writeBack; + } + + transition(I, WrVicBlk) {TagArrayRead} { + wt_writeThrough; + p_popRequestQueue; + } + + transition(V, WrVicBlk) {TagArrayRead, DataArrayWrite} { + ut_updateTag; + wdb_writeDirtyBytes; + wt_writeThrough; + p_popRequestQueue; + } + + transition({V, M}, WrVicBlkBack, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + ut_updateTag; + swb_sendWBAck; + wdb_writeDirtyBytes; + p_popRequestQueue; + } + + transition(W, WrVicBlkBack) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + ut_updateTag; + swb_sendWBAck; + wdb_writeDirtyBytes; + p_popRequestQueue; + } + + transition(I, WrVicBlkBack, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + a_allocateBlock; + ut_updateTag; + swb_sendWBAck; + wdb_writeDirtyBytes; + p_popRequestQueue; + } + + transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} { + t_allocateTBE; + wb_writeBack; + i_invL2; + } + + transition({I, V}, L2_Repl, I) {TagArrayRead, TagArrayWrite} { + i_invL2; + } + + transition({A, IV, WI}, L2_Repl) { + i_invL2; + } + + transition({I, V}, PrbInv, I) {TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition(M, PrbInv, W) {TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition(W, PrbInv) {TagArrayRead} { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition({A, IV, WI}, PrbInv) { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + a_allocateBlock; + ut_updateTag; + wcb_writeCacheBlock; + sdr_sendDataResponse; + pr_popResponseQueue; + dt_deallocateTBE; + } + + transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + a_allocateBlock; + ar_sendAtomicResponse; + dna_decrementNumAtomics; + pr_popResponseQueue; + } + + transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} { + dt_deallocateTBE; + ptr_popTriggerQueue; + } + + transition(A, AtomicNotDone) {TagArrayRead} { + ptr_popTriggerQueue; + } + + //M,W should not see WBAck as the cache is in WB mode + //WBAcks do not need to check tags + transition({I, V, IV, A}, WBAck) { + w_sendResponseWBAck; + pr_popResponseQueue; + } + + transition(WI, WBAck,I) { + dt_deallocateTBE; + pr_popResponseQueue; + } +} diff --git a/src/mem/protocol/GPU_VIPER-TCP.sm b/src/mem/protocol/GPU_VIPER-TCP.sm new file mode 100644 index 000000000..d81196b17 --- /dev/null +++ b/src/mem/protocol/GPU_VIPER-TCP.sm @@ -0,0 +1,747 @@ +/* + * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Blake Hechtman + */ + +machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") + : VIPERCoalescer* coalescer; + Sequencer* sequencer; + bool use_seq_not_coal; + CacheMemory * L1cache; + bool WB; /*is this cache Writeback?*/ + bool disableL1; /* bypass L1 cache? */ + int TCC_select_num_bits; + Cycles issue_latency := 40; // time to send data down to TCC + Cycles l2_hit_latency := 18; + + MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request"; + MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response"; + MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock"; + + MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request"; + MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response"; + MessageBuffer * mandatoryQueue; + +{ + state_declaration(State, desc="TCP Cache States", default="TCP_State_I") { + I, AccessPermission:Invalid, desc="Invalid"; + V, AccessPermission:Read_Only, desc="Valid"; + W, AccessPermission:Read_Write, desc="Written"; + M, AccessPermission:Read_Write, desc="Written and Valid"; + L, AccessPermission:Read_Write, desc="Local access is modifable"; + A, AccessPermission:Invalid, desc="Waiting on Atomic"; + } + + enumeration(Event, desc="TCP Events") { + // Core initiated + Load, desc="Load"; + Store, desc="Store to L1 (L1 is dirty)"; + StoreThrough, desc="Store directly to L2(L1 is clean)"; + StoreLocal, desc="Store to L1 but L1 is clean"; + Atomic, desc="Atomic"; + Flush, desc="Flush if dirty(wbL1 for Store Release)"; + Evict, desc="Evict if clean(invL1 for Load Acquire)"; + // Mem sys initiated + Repl, desc="Replacing block from cache"; + + // TCC initiated + TCC_Ack, desc="TCC Ack to Core Request"; + TCC_AckWB, desc="TCC Ack for WB"; + // Disable L1 cache + Bypass, desc="Bypass the entire L1 cache"; + } + + enumeration(RequestType, + desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + TagArrayFlash, desc="Flash clear the data array"; + } + + + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff than memory)?"; + DataBlock DataBlk, desc="data for the block"; + bool FromL2, default="false", desc="block just moved from L2"; + WriteMask writeMask, desc="written bytes masks"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int NumPendingMsgs,desc="Number of acks/data messages that this processor is waiting for"; + bool Shared, desc="Victim hit by shared probe"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs"; + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int WTcnt, default="0"; + int Fcnt, default="0"; + bool inFlush, default="false"; + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + // Internal functions + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + Entry getCacheEntry(Addr address), return_by_pointer="yes" { + Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address)); + return cache_entry; + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return tbe.DataBlk; + } else { + return getCacheEntry(addr).DataBlk; + } + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return TCP_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return TCP_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + bool isValid(Addr addr) { + AccessPermission perm := getAccessPermission(addr); + if (perm == AccessPermission:NotPresent || + perm == AccessPermission:Invalid || + perm == AccessPermission:Busy) { + return false; + } else { + return true; + } + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(TCP_State_to_permission(state)); + } + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:DataArrayWrite) { + L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:TagArrayRead) { + L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayFlash) { + L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayWrite) { + L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayFlash) { + // FIXME should check once per cache, rather than once per cacheline + return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + // Out Ports + + out_port(requestNetwork_out, CPURequestMsg, requestFromTCP); + + // In Ports + + in_port(responseToTCP_in, ResponseMsg, responseToTCP) { + if (responseToTCP_in.isReady(clockEdge())) { + peek(responseToTCP_in, ResponseMsg, block_on="addr") { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:TDSysResp) { + // disable L1 cache + if (disableL1) { + trigger(Event:Bypass, in_msg.addr, cache_entry, tbe); + } else { + if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) { + trigger(Event:TCC_Ack, in_msg.addr, cache_entry, tbe); + } else { + Addr victim := L1cache.cacheProbe(in_msg.addr); + trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } + } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck || + in_msg.Type == CoherenceResponseType:NBSysWBAck) { + trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe); + } else { + error("Unexpected Response Message to Core"); + } + } + } + } + + in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { + if (mandatoryQueue_in.isReady(clockEdge())) { + peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { + Entry cache_entry := getCacheEntry(in_msg.LineAddress); + TBE tbe := TBEs.lookup(in_msg.LineAddress); + DPRINTF(RubySlicc, "%s\n", in_msg); + if (in_msg.Type == RubyRequestType:LD) { + trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe); + } else if (in_msg.Type == RubyRequestType:ATOMIC) { + trigger(Event:Atomic, in_msg.LineAddress, cache_entry, tbe); + } else if (in_msg.Type == RubyRequestType:ST) { + if(disableL1) { + trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe); + } else { + if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { + if (in_msg.segment == HSASegment:SPILL) { + trigger(Event:StoreLocal, in_msg.LineAddress, cache_entry, tbe); + } else if (WB) { + trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe); + } else { + trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe); + } + } else { + Addr victim := L1cache.cacheProbe(in_msg.LineAddress); + trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } // end if (disableL1) + } else if (in_msg.Type == RubyRequestType:FLUSH) { + trigger(Event:Flush, in_msg.LineAddress, cache_entry, tbe); + } else if (in_msg.Type == RubyRequestType:REPLACEMENT){ + trigger(Event:Evict, in_msg.LineAddress, cache_entry, tbe); + } else { + error("Unexpected Request Message from VIC"); + if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { + if (WB) { + trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe); + } else { + trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe); + } + } else { + Addr victim := L1cache.cacheProbe(in_msg.LineAddress); + trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } + } + } + } + + // Actions + + action(ic_invCache, "ic", desc="invalidate cache") { + if(is_valid(cache_entry)) { + cache_entry.writeMask.clear(); + L1cache.deallocate(address); + } + unset_cache_entry(); + } + + action(n_issueRdBlk, "n", desc="Issue RdBlk") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlk; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(rb_bypassDone, "rb", desc="bypass L1 of read access") { + peek(responseToTCP_in, ResponseMsg) { + DataBlock tmp:= in_msg.DataBlk; + if (use_seq_not_coal) { + sequencer.readCallback(address, tmp, false, MachineType:L1Cache); + } else { + coalescer.readCallback(address, MachineType:L1Cache, tmp); + } + if(is_valid(cache_entry)) { + unset_cache_entry(); + } + } + } + + action(wab_bypassDone, "wab", desc="bypass L1 of write access") { + peek(responseToTCP_in, ResponseMsg) { + DataBlock tmp := in_msg.DataBlk; + if (use_seq_not_coal) { + sequencer.writeCallback(address, tmp, false, MachineType:L1Cache); + } else { + coalescer.writeCallback(address, MachineType:L1Cache, tmp); + } + } + } + + action(norl_issueRdBlkOrloadDone, "norl", desc="local load done") { + peek(mandatoryQueue_in, RubyRequest){ + if (cache_entry.writeMask.cmpMask(in_msg.writeMask)) { + if (use_seq_not_coal) { + sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); + } else { + coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + } + } else { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlk; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + } + } + + action(wt_writeThrough, "wt", desc="Flush dirty data") { + WTcnt := WTcnt + 1; + APPEND_TRANSITION_COMMENT("write++ = "); + APPEND_TRANSITION_COMMENT(WTcnt); + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + assert(is_valid(cache_entry)); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.writeMask.clear(); + out_msg.writeMask.orMask(cache_entry.writeMask); + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:WriteThrough; + out_msg.InitialRequestTime := curCycle(); + out_msg.Shared := false; + } + } + + action(at_atomicThrough, "at", desc="send Atomic") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.writeMask.clear(); + out_msg.writeMask.orMask(in_msg.writeMask); + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:Atomic; + out_msg.InitialRequestTime := curCycle(); + out_msg.Shared := false; + } + } + } + + action(a_allocate, "a", desc="allocate block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L1cache.allocate(address, new Entry)); + } + cache_entry.writeMask.clear(); + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + } + + action(d_deallocateTBE, "d", desc="Deallocate TBE") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(sf_setFlush, "sf", desc="set flush") { + inFlush := true; + APPEND_TRANSITION_COMMENT(" inFlush is true"); + } + + action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { + mandatoryQueue_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { + responseToTCP_in.dequeue(clockEdge()); + } + + action(l_loadDone, "l", desc="local load done") { + assert(is_valid(cache_entry)); + if (use_seq_not_coal) { + sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); + } else { + coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + } + } + + action(s_storeDone, "s", desc="local store done") { + assert(is_valid(cache_entry)); + + if (use_seq_not_coal) { + sequencer.writeCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); + } else { + coalescer.writeCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + } + cache_entry.Dirty := true; + } + + action(inv_invDone, "inv", desc="local inv done") { + if (use_seq_not_coal) { + DPRINTF(RubySlicc, "Sequencer does not define invCallback!\n"); + assert(false); + } else { + coalescer.invCallback(address); + } + } + + action(wb_wbDone, "wb", desc="local wb done") { + if (inFlush == true) { + Fcnt := Fcnt + 1; + if (Fcnt > WTcnt) { + if (use_seq_not_coal) { + DPRINTF(RubySlicc, "Sequencer does not define wbCallback!\n"); + assert(false); + } else { + coalescer.wbCallback(address); + } + Fcnt := Fcnt - 1; + } + if (WTcnt == 0 && Fcnt == 0) { + inFlush := false; + APPEND_TRANSITION_COMMENT(" inFlush is false"); + } + } + } + + action(wd_wtDone, "wd", desc="writethrough done") { + WTcnt := WTcnt - 1; + if (inFlush == true) { + Fcnt := Fcnt -1; + } + assert(WTcnt >= 0); + APPEND_TRANSITION_COMMENT("write-- = "); + APPEND_TRANSITION_COMMENT(WTcnt); + } + + action(dw_dirtyWrite, "dw", desc="update write mask"){ + peek(mandatoryQueue_in, RubyRequest) { + cache_entry.DataBlk.copyPartial(in_msg.WTData,in_msg.writeMask); + cache_entry.writeMask.orMask(in_msg.writeMask); + } + } + action(w_writeCache, "w", desc="write data to cache") { + peek(responseToTCP_in, ResponseMsg) { + assert(is_valid(cache_entry)); + DataBlock tmp := in_msg.DataBlk; + tmp.copyPartial(cache_entry.DataBlk,cache_entry.writeMask); + cache_entry.DataBlk := tmp; + } + } + + action(mru_updateMRU, "mru", desc="Touch block for replacement policy") { + L1cache.setMRU(address); + } + +// action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") { +// mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); +// } + + action(z_stall, "z", desc="stall; built-in") { + // built-int action + } + + // Transitions + // ArrayRead/Write assumptions: + // All requests read Tag Array + // TBE allocation write the TagArray to I + // TBE only checked on misses + // Stores will also write dirty bits in the tag + // WriteThroughs still need to use cache entry as staging buffer for wavefront + + // Stalling transitions do NOT check the tag array...and if they do, + // they can cause a resource stall deadlock! + + transition({A}, {Load, Store, Atomic, StoreThrough}) { //TagArrayRead} { + z_stall; + } + + transition({M, V, L}, Load) {TagArrayRead, DataArrayRead} { + l_loadDone; + mru_updateMRU; + p_popMandatoryQueue; + } + + transition(I, Load) {TagArrayRead} { + n_issueRdBlk; + p_popMandatoryQueue; + } + + transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + mru_updateMRU; + at_atomicThrough; + p_popMandatoryQueue; + } + + transition({M, W}, Atomic, A) {TagArrayRead, TagArrayWrite} { + wt_writeThrough; + t_allocateTBE; + at_atomicThrough; + ic_invCache; + } + + transition(W, Load, I) {TagArrayRead, DataArrayRead} { + wt_writeThrough; + norl_issueRdBlkOrloadDone; + p_popMandatoryQueue; + } + + transition({I}, StoreLocal, L) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + a_allocate; + dw_dirtyWrite; + s_storeDone; + p_popMandatoryQueue; + } + + transition({L, V}, StoreLocal, L) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + dw_dirtyWrite; + mru_updateMRU; + s_storeDone; + p_popMandatoryQueue; + } + + transition(I, Store, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + a_allocate; + dw_dirtyWrite; + s_storeDone; + p_popMandatoryQueue; + } + + transition(V, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + dw_dirtyWrite; + mru_updateMRU; + s_storeDone; + p_popMandatoryQueue; + } + + transition({M, W}, Store) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + dw_dirtyWrite; + mru_updateMRU; + s_storeDone; + p_popMandatoryQueue; + } + + //M,W should not see storeThrough + transition(I, StoreThrough) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + a_allocate; + dw_dirtyWrite; + s_storeDone; + wt_writeThrough; + ic_invCache; + p_popMandatoryQueue; + } + + transition({V,L}, StoreThrough, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + dw_dirtyWrite; + s_storeDone; + wt_writeThrough; + ic_invCache; + p_popMandatoryQueue; + } + + transition(I, TCC_Ack, V) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} { + a_allocate; + w_writeCache; + l_loadDone; + pr_popResponseQueue; + } + + transition(I, Bypass, I) { + rb_bypassDone; + pr_popResponseQueue; + } + + transition(A, Bypass, I){ + d_deallocateTBE; + wab_bypassDone; + pr_popResponseQueue; + } + + transition(A, TCC_Ack, I) {TagArrayRead, DataArrayRead, DataArrayWrite} { + d_deallocateTBE; + a_allocate; + w_writeCache; + s_storeDone; + pr_popResponseQueue; + ic_invCache; + } + + transition(V, TCC_Ack, V) {TagArrayRead, DataArrayRead, DataArrayWrite} { + w_writeCache; + l_loadDone; + pr_popResponseQueue; + } + + transition({W, M}, TCC_Ack, M) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} { + w_writeCache; + l_loadDone; + pr_popResponseQueue; + } + + transition({I, V}, Repl, I) {TagArrayRead, TagArrayWrite} { + ic_invCache; + } + + transition({A}, Repl) {TagArrayRead, TagArrayWrite} { + ic_invCache; + } + + transition({W, M}, Repl, I) {TagArrayRead, TagArrayWrite, DataArrayRead} { + wt_writeThrough; + ic_invCache; + } + + transition(L, Repl, I) {TagArrayRead, TagArrayWrite, DataArrayRead} { + wt_writeThrough; + ic_invCache; + } + + transition({W, M}, Flush, I) {TagArrayRead, TagArrayWrite, DataArrayRead} { + sf_setFlush; + wt_writeThrough; + ic_invCache; + p_popMandatoryQueue; + } + + transition({V, I, A, L},Flush) {TagArrayFlash} { + sf_setFlush; + wb_wbDone; + p_popMandatoryQueue; + } + + transition({I, V}, Evict, I) {TagArrayFlash} { + inv_invDone; + p_popMandatoryQueue; + ic_invCache; + } + + transition({W, M}, Evict, W) {TagArrayFlash} { + inv_invDone; + p_popMandatoryQueue; + } + + transition({A, L}, Evict) {TagArrayFlash} { + inv_invDone; + p_popMandatoryQueue; + } + + // TCC_AckWB only snoops TBE + transition({V, I, A, M, W, L}, TCC_AckWB) { + wd_wtDone; + wb_wbDone; + pr_popResponseQueue; + } +} diff --git a/src/mem/protocol/GPU_VIPER.slicc b/src/mem/protocol/GPU_VIPER.slicc new file mode 100644 index 000000000..45f7f3477 --- /dev/null +++ b/src/mem/protocol/GPU_VIPER.slicc @@ -0,0 +1,9 @@ +protocol "GPU_VIPER"; +include "RubySlicc_interfaces.slicc"; +include "MOESI_AMD_Base-msg.sm"; +include "MOESI_AMD_Base-dir.sm"; +include "MOESI_AMD_Base-CorePair.sm"; +include "GPU_VIPER-TCP.sm"; +include "GPU_VIPER-SQC.sm"; +include "GPU_VIPER-TCC.sm"; +include "MOESI_AMD_Base-L3cache.sm"; diff --git a/src/mem/protocol/GPU_VIPER_Baseline.slicc b/src/mem/protocol/GPU_VIPER_Baseline.slicc new file mode 100644 index 000000000..49bdce38c --- /dev/null +++ b/src/mem/protocol/GPU_VIPER_Baseline.slicc @@ -0,0 +1,9 @@ +protocol "GPU_VIPER"; +include "RubySlicc_interfaces.slicc"; +include "MOESI_AMD_Base-msg.sm"; +include "MOESI_AMD_Base-probeFilter.sm"; +include "MOESI_AMD_Base-CorePair.sm"; +include "GPU_VIPER-TCP.sm"; +include "GPU_VIPER-SQC.sm"; +include "GPU_VIPER-TCC.sm"; +include "MOESI_AMD_Base-L3cache.sm"; diff --git a/src/mem/protocol/GPU_VIPER_Region-TCC.sm b/src/mem/protocol/GPU_VIPER_Region-TCC.sm new file mode 100644 index 000000000..c3aef15a3 --- /dev/null +++ b/src/mem/protocol/GPU_VIPER_Region-TCC.sm @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Sooraj Puthoor, Blake Hechtman + */ + +/* + * This file is inherited from GPU_VIPER-TCC.sm and retains its structure. + * There are very few modifications in this file from the original VIPER TCC + */ + +machine(MachineType:TCC, "TCC Cache") + : CacheMemory * L2cache; + bool WB; /*is this cache Writeback?*/ + int regionBufferNum; + Cycles l2_request_latency := 50; + Cycles l2_response_latency := 20; + + // From the TCPs or SQCs + MessageBuffer * requestFromTCP, network="From", virtual_network="1", ordered="true", vnet_type="request"; + // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC. + MessageBuffer * responseToCore, network="To", virtual_network="3", ordered="true", vnet_type="response"; + // From the NB + MessageBuffer * probeFromNB, network="From", virtual_network="0", ordered="false", vnet_type="request"; + MessageBuffer * responseFromNB, network="From", virtual_network="2", ordered="false", vnet_type="response"; + // To the NB + MessageBuffer * requestToNB, network="To", virtual_network="0", ordered="false", vnet_type="request"; + MessageBuffer * responseToNB, network="To", virtual_network="2", ordered="false", vnet_type="response"; + MessageBuffer * unblockToNB, network="To", virtual_network="4", ordered="false", vnet_type="unblock"; + + MessageBuffer * triggerQueue, ordered="true", random="false"; +{ + // EVENTS + enumeration(Event, desc="TCC Events") { + // Requests coming from the Cores + RdBlk, desc="RdBlk event"; + WrVicBlk, desc="L1 Write Through"; + WrVicBlkBack, desc="L1 Write Back(dirty cache)"; + Atomic, desc="Atomic Op"; + AtomicDone, desc="AtomicOps Complete"; + AtomicNotDone, desc="AtomicOps not Complete"; + Data, desc="data messgae"; + // Coming from this TCC + L2_Repl, desc="L2 Replacement"; + // Probes + PrbInv, desc="Invalidating probe"; + // Coming from Memory Controller + WBAck, desc="writethrough ack from memory"; + } + + // STATES + state_declaration(State, desc="TCC State", default="TCC_State_I") { + M, AccessPermission:Read_Write, desc="Modified(dirty cache only)"; + W, AccessPermission:Read_Write, desc="Written(dirty cache only)"; + V, AccessPermission:Read_Only, desc="Valid"; + I, AccessPermission:Invalid, desc="Invalid"; + IV, AccessPermission:Busy, desc="Waiting for Data"; + WI, AccessPermission:Busy, desc="Waiting on Writethrough Ack"; + A, AccessPermission:Busy, desc="Invalid waiting on atomic Data"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + + // STRUCTURES + + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff from memory?)"; + DataBlock DataBlk, desc="Data for the block"; + WriteMask writeMask, desc="Dirty byte mask"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block"; + bool Dirty, desc="Is the data dirty?"; + bool Shared, desc="Victim hit by shared probe"; + MachineID From, desc="Waiting for writeback from..."; + NetDest Destination, desc="Data destination"; + int numAtomics, desc="number remaining atomics"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs"; + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + + + // FUNCTION DEFINITIONS + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + MachineID getPeer(MachineID mach) { + return createMachineID(MachineType:RegionBuffer, intToID(regionBufferNum)); + } + + Entry getCacheEntry(Addr addr), return_by_pointer="yes" { + return static_cast(Entry, "pointer", L2cache.lookup(addr)); + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + return getCacheEntry(addr).DataBlk; + } + + bool presentOrAvail(Addr addr) { + return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr); + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return TCC_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return TCC_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(TCC_State_to_permission(state)); + } + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + L2cache.recordRequestType(CacheRequestType:DataArrayRead,addr); + } else if (request_type == RequestType:DataArrayWrite) { + L2cache.recordRequestType(CacheRequestType:DataArrayWrite,addr); + } else if (request_type == RequestType:TagArrayRead) { + L2cache.recordRequestType(CacheRequestType:TagArrayRead,addr); + } else if (request_type == RequestType:TagArrayWrite) { + L2cache.recordRequestType(CacheRequestType:TagArrayWrite,addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + + // ** OUT_PORTS ** + + // Three classes of ports + // Class 1: downward facing network links to NB + out_port(requestToNB_out, CPURequestMsg, requestToNB); + out_port(responseToNB_out, ResponseMsg, responseToNB); + out_port(unblockToNB_out, UnblockMsg, unblockToNB); + + // Class 2: upward facing ports to GPU cores + out_port(responseToCore_out, ResponseMsg, responseToCore); + + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + // + // request queue going to NB + // + + +// ** IN_PORTS ** + in_port(triggerQueue_in, TiggerMsg, triggerQueue) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (tbe.numAtomics == 0) { + trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe); + } + } + } + } + + + + in_port(responseFromNB_in, ResponseMsg, responseFromNB) { + if (responseFromNB_in.isReady(clockEdge())) { + peek(responseFromNB_in, ResponseMsg, block_on="addr") { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:NBSysResp) { + if(presentOrAvail(in_msg.addr)) { + trigger(Event:Data, in_msg.addr, cache_entry, tbe); + } else { + Addr victim := L2cache.cacheProbe(in_msg.addr); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) { + trigger(Event:WBAck, in_msg.addr, cache_entry, tbe); + } else { + error("Unexpected Response Message to Core"); + } + } + } + } + + // Finally handling incoming requests (from TCP) and probes (from NB). + + in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, NBProbeRequestMsg) { + DPRINTF(RubySlicc, "%s\n", in_msg); + DPRINTF(RubySlicc, "machineID: %s\n", machineID); + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); + } + } + } + + + in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) { + if (coreRequestNetwork_in.isReady(clockEdge())) { + peek(coreRequestNetwork_in, CPURequestMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + if(WB) { + if(presentOrAvail(in_msg.addr)) { + trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe); + } else { + Addr victim := L2cache.cacheProbe(in_msg.addr); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else { + trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:Atomic) { + trigger(Event:Atomic, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlk) { + trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); + } else { + DPRINTF(RubySlicc, "%s\n", in_msg); + error("Unexpected Response Message to Core"); + } + } + } + } + // BEGIN ACTIONS + + action(i_invL2, "i", desc="invalidate TCC cache block") { + if (is_valid(cache_entry)) { + L2cache.deallocate(address); + } + unset_cache_entry(); + } + + // Data available at TCC. Send the DATA to TCP + action(sd_sendData, "sd", desc="send Shared response") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Shared; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + + // Data was not available at TCC. So, TCC forwarded the request to + // directory and directory responded back with data. Now, forward the + // DATA to TCP and send the unblock ack back to directory. + action(sdr_sendDataResponse, "sdr", desc="send Shared response") { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := machineID; + out_msg.Destination := tbe.Destination; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Shared; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + enqueue(unblockToNB_out, UnblockMsg, 1) { + out_msg.addr := address; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + + action(rd_requestData, "r", desc="Miss in L2, pass on") { + if(tbe.Destination.count()==1){ + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.Shared := false; // unneeded for this request + out_msg.MessageSize := in_msg.MessageSize; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + } + + action(w_sendResponseWBAck, "w", desc="send WB Ack") { + peek(responseFromNB_in, ResponseMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysWBAck; + out_msg.Destination.clear(); + out_msg.Destination.add(in_msg.WTRequestor); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(swb_sendWBAck, "swb", desc="send WB Ack") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysWBAck; + out_msg.Destination.clear(); + out_msg.Destination.add(in_msg.Requestor); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") { + peek(responseFromNB_in, ResponseMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Destination.add(in_msg.WTRequestor); + out_msg.Sender := machineID; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := in_msg.DataBlk; + } + } + } + action(sd2rb_sendDone2RegionBuffer, "sd2rb", desc="Request finished, send done ack") { + enqueue(unblockToNB_out, UnblockMsg, 1) { + out_msg.addr := address; + out_msg.Destination.add(getPeer(machineID)); + out_msg.DoneAck := true; + out_msg.MessageSize := MessageSizeType:Unblock_Control; + if (is_valid(tbe)) { + out_msg.Dirty := tbe.Dirty; + } else { + out_msg.Dirty := false; + } + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(a_allocateBlock, "a", desc="allocate TCC block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L2cache.allocate(address, new Entry)); + cache_entry.writeMask.clear(); + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + if (is_invalid(tbe)) { + check_allocate(TBEs); + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.Destination.clear(); + tbe.numAtomics := 0; + } + if (coreRequestNetwork_in.isReady(clockEdge())) { + peek(coreRequestNetwork_in, CPURequestMsg) { + if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){ + tbe.Destination.add(in_msg.Requestor); + } + } + } + } + + action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") { + tbe.Destination.clear(); + TBEs.deallocate(address); + unset_tbe(); + } + + action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") { + peek(responseFromNB_in, ResponseMsg) { + cache_entry.DataBlk := in_msg.DataBlk; + DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg); + } + } + + action(wdb_writeDirtyBytes, "wdb", desc="write data to TCC") { + peek(coreRequestNetwork_in, CPURequestMsg) { + cache_entry.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask); + cache_entry.writeMask.orMask(in_msg.writeMask); + DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg); + } + } + + action(wt_writeThrough, "wt", desc="write through data") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.WTRequestor := in_msg.Requestor; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:WriteThrough; + out_msg.Dirty := true; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.writeMask.orMask(in_msg.writeMask); + } + } + } + + action(wb_writeBack, "wb", desc="write back data") { + enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.WTRequestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:WriteThrough; + out_msg.Dirty := true; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.writeMask.orMask(cache_entry.writeMask); + } + } + + action(at_atomicThrough, "at", desc="write back data") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.WTRequestor := in_msg.Requestor; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:Atomic; + out_msg.Dirty := true; + out_msg.writeMask.orMask(in_msg.writeMask); + } + } + } + + action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { + enqueue(responseToNB_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { + L2cache.setMRU(address); + } + + action(p_popRequestQueue, "p", desc="pop request queue") { + coreRequestNetwork_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="pop response queue") { + responseFromNB_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="pop probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + action(zz_recycleRequestQueue, "z", desc="stall"){ + coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + + action(ina_incrementNumAtomics, "ina", desc="inc num atomics") { + tbe.numAtomics := tbe.numAtomics + 1; + } + + + action(dna_decrementNumAtomics, "dna", desc="dec num atomics") { + tbe.numAtomics := tbe.numAtomics - 1; + if (tbe.numAtomics==0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AtomicDone; + } + } + } + + action(ptr_popTriggerQueue, "ptr", desc="pop Trigger") { + triggerQueue_in.dequeue(clockEdge()); + } + + // END ACTIONS + + // BEGIN TRANSITIONS + // transitions from base + // Assumptions for ArrayRead/Write + // TBE checked before tags + // Data Read/Write requires Tag Read + + transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} { + zz_recycleRequestQueue; + } + transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) {TagArrayRead} { + zz_recycleRequestQueue; + } + transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} { + zz_recycleRequestQueue; + } + transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} { + sd_sendData; + ut_updateTag; + p_popRequestQueue; + } + transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} { + t_allocateTBE; + wb_writeBack; + } + + transition(I, RdBlk, IV) {TagArrayRead} { + t_allocateTBE; + rd_requestData; + p_popRequestQueue; + } + + transition(IV, RdBlk) { + t_allocateTBE; + rd_requestData; + p_popRequestQueue; + } + + transition({V, I},Atomic, A) {TagArrayRead} { + i_invL2; + t_allocateTBE; + at_atomicThrough; + ina_incrementNumAtomics; + p_popRequestQueue; + } + + transition(A, Atomic) { + at_atomicThrough; + ina_incrementNumAtomics; + p_popRequestQueue; + } + + transition({M, W}, Atomic, WI) {TagArrayRead} { + t_allocateTBE; + wb_writeBack; + } + + // Cahceblock stays in I state which implies + // this TCC is a write-no-allocate cache + transition(I, WrVicBlk) {TagArrayRead} { + wt_writeThrough; + p_popRequestQueue; + } + + transition(V, WrVicBlk) {TagArrayRead, DataArrayWrite} { + ut_updateTag; + wdb_writeDirtyBytes; + wt_writeThrough; + p_popRequestQueue; + } + + transition({V, M}, WrVicBlkBack, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + ut_updateTag; + swb_sendWBAck; + wdb_writeDirtyBytes; + p_popRequestQueue; + } + + transition(W, WrVicBlkBack) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + ut_updateTag; + swb_sendWBAck; + wdb_writeDirtyBytes; + p_popRequestQueue; + } + + transition(I, WrVicBlkBack, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + a_allocateBlock; + ut_updateTag; + swb_sendWBAck; + wdb_writeDirtyBytes; + p_popRequestQueue; + } + + transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} { + t_allocateTBE; + wb_writeBack; + i_invL2; + } + + transition({I, V}, L2_Repl, I) {TagArrayRead, TagArrayWrite} { + i_invL2; + } + + transition({A, IV, WI}, L2_Repl) { + i_invL2; + } + + transition({I, V}, PrbInv, I) {TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition(M, PrbInv, W) {TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition(W, PrbInv) {TagArrayRead} { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition({A, IV, WI}, PrbInv) { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + a_allocateBlock; + ut_updateTag; + wcb_writeCacheBlock; + sdr_sendDataResponse; + sd2rb_sendDone2RegionBuffer; + pr_popResponseQueue; + dt_deallocateTBE; + } + + transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + a_allocateBlock; + ar_sendAtomicResponse; + sd2rb_sendDone2RegionBuffer; + dna_decrementNumAtomics; + pr_popResponseQueue; + } + + transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} { + dt_deallocateTBE; + ptr_popTriggerQueue; + } + + transition(A, AtomicNotDone) {TagArrayRead} { + ptr_popTriggerQueue; + } + + //M,W should not see WBAck as the cache is in WB mode + //WBAcks do not need to check tags + transition({I, V, IV, A}, WBAck) { + w_sendResponseWBAck; + sd2rb_sendDone2RegionBuffer; + pr_popResponseQueue; + } + + transition(WI, WBAck,I) { + sd2rb_sendDone2RegionBuffer; + dt_deallocateTBE; + pr_popResponseQueue; + } +} diff --git a/src/mem/protocol/GPU_VIPER_Region.slicc b/src/mem/protocol/GPU_VIPER_Region.slicc new file mode 100644 index 000000000..cbfef9de3 --- /dev/null +++ b/src/mem/protocol/GPU_VIPER_Region.slicc @@ -0,0 +1,11 @@ +protocol "GPU_VIPER_Region"; +include "RubySlicc_interfaces.slicc"; +include "MOESI_AMD_Base-msg.sm"; +include "MOESI_AMD_Base-Region-CorePair.sm"; +include "MOESI_AMD_Base-L3cache.sm"; +include "MOESI_AMD_Base-Region-dir.sm"; +include "GPU_VIPER_Region-TCC.sm"; +include "GPU_VIPER-TCP.sm"; +include "GPU_VIPER-SQC.sm"; +include "MOESI_AMD_Base-RegionDir.sm"; +include "MOESI_AMD_Base-RegionBuffer.sm"; diff --git a/src/mem/protocol/MOESI_AMD_Base-CorePair.sm b/src/mem/protocol/MOESI_AMD_Base-CorePair.sm new file mode 100644 index 000000000..76fe77230 --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-CorePair.sm @@ -0,0 +1,2904 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +machine(MachineType:CorePair, "CP-like Core Coherence") + : Sequencer * sequencer; + Sequencer * sequencer1; + CacheMemory * L1Icache; + CacheMemory * L1D0cache; + CacheMemory * L1D1cache; + CacheMemory * L2cache; // func mem logic looks in this CacheMemory + bool send_evictions := "False"; + Cycles issue_latency := 5; // time to send data down to NB + Cycles l2_hit_latency := 18; + + // BEGIN Core Buffers + + // To the Network + MessageBuffer * requestFromCore, network="To", virtual_network="0", vnet_type="request"; + MessageBuffer * responseFromCore, network="To", virtual_network="2", vnet_type="response"; + MessageBuffer * unblockFromCore, network="To", virtual_network="4", vnet_type="unblock"; + + // From the Network + MessageBuffer * probeToCore, network="From", virtual_network="0", vnet_type="request"; + MessageBuffer * responseToCore, network="From", virtual_network="2", vnet_type="response"; + + MessageBuffer * mandatoryQueue; + + MessageBuffer * triggerQueue, ordered="true"; + + // END Core Buffers + +{ + // BEGIN STATES + state_declaration(State, desc="Cache states", default="CorePair_State_I") { + + // Base States + I, AccessPermission:Invalid, desc="Invalid"; + S, AccessPermission:Read_Only, desc="Shared"; + E0, AccessPermission:Read_Write, desc="Exclusive with Cluster 0 ownership"; + E1, AccessPermission:Read_Write, desc="Exclusive with Cluster 1 ownership"; + Es, AccessPermission:Read_Write, desc="Exclusive in core"; + O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line"; + Ms, AccessPermission:Read_Write, desc="Modified in core, both clusters may be sharing line"; + M0, AccessPermission:Read_Write, desc="Modified with cluster ownership"; + M1, AccessPermission:Read_Write, desc="Modified with cluster ownership"; + + // Transient States + I_M0, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet"; + I_M1, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet"; + I_M0M1, AccessPermission:Busy, desc="Was in I_M0, got a store request from other cluster as well"; + I_M1M0, AccessPermission:Busy, desc="Was in I_M1, got a store request from other cluster as well"; + I_M0Ms, AccessPermission:Busy, desc="Was in I_M0, got a load request from other cluster as well"; + I_M1Ms, AccessPermission:Busy, desc="Was in I_M1, got a load request from other cluster as well"; + I_E0S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet"; + I_E1S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet"; + I_ES, AccessPermission:Busy, desc="S_F got hit by invalidating probe, RdBlk response needs to go to both clusters"; + + IF_E0S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E0S but expecting a L2_to_L1D0 trigger, just drop when receive"; + IF_E1S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E1S but expecting a L2_to_L1D1 trigger, just drop when receive"; + IF_ES, AccessPermission:Busy, desc="same, but waiting for two fills"; + IF0_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one"; + IF1_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one"; + F_S0, AccessPermission:Busy, desc="same, but going to S0 when trigger received"; + F_S1, AccessPermission:Busy, desc="same, but going to S1 when trigger received"; + + ES_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for clean writeback ack"; + MO_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for dirty writeback ack"; + MO_S0, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS"; + MO_S1, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS"; + S_F0, AccessPermission:Read_Only, desc="Shared, filling L1"; + S_F1, AccessPermission:Read_Only, desc="Shared, filling L1"; + S_F, AccessPermission:Read_Only, desc="Shared, filling L1"; + O_F0, AccessPermission:Read_Only, desc="Owned, filling L1"; + O_F1, AccessPermission:Read_Only, desc="Owned, filling L1"; + O_F, AccessPermission:Read_Only, desc="Owned, filling L1"; + Si_F0, AccessPermission:Read_Only, desc="Shared, filling icache"; + Si_F1, AccessPermission:Read_Only, desc="Shared, filling icache"; + S_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + S_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + O_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + O_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + S0, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 0, waiting for response"; + S1, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 1, waiting for response"; + + Es_F0, AccessPermission:Read_Write, desc="Es, Cluster read, filling"; + Es_F1, AccessPermission:Read_Write, desc="Es, Cluster read, filling"; + Es_F, AccessPermission:Read_Write, desc="Es, other cluster read, filling"; + E0_F, AccessPermission:Read_Write, desc="E0, cluster read, filling"; + E1_F, AccessPermission:Read_Write, desc="..."; + E0_Es, AccessPermission:Read_Write, desc="..."; + E1_Es, AccessPermission:Read_Write, desc="..."; + Ms_F0, AccessPermission:Read_Write, desc="..."; + Ms_F1, AccessPermission:Read_Write, desc="..."; + Ms_F, AccessPermission:Read_Write, desc="..."; + M0_F, AccessPermission:Read_Write, desc="..."; + M0_Ms, AccessPermission:Read_Write, desc="..."; + M1_F, AccessPermission:Read_Write, desc="..."; + M1_Ms, AccessPermission:Read_Write, desc="..."; + + I_C, AccessPermission:Invalid, desc="Invalid, but waiting for WBAck from NB from canceled writeback"; + S0_C, AccessPermission:Busy, desc="MO_S0 hit by invalidating probe, waiting for WBAck form NB for canceled WB"; + S1_C, AccessPermission:Busy, desc="MO_S1 hit by invalidating probe, waiting for WBAck form NB for canceled WB"; + S_C, AccessPermission:Busy, desc="S*_C got NB_AckS, still waiting for WBAck"; + + } // END STATES + + // BEGIN EVENTS + enumeration(Event, desc="CP Events") { + // CP Initiated events + C0_Load_L1miss, desc="Cluster 0 load, L1 missed"; + C0_Load_L1hit, desc="Cluster 0 load, L1 hit"; + C1_Load_L1miss, desc="Cluster 1 load L1 missed"; + C1_Load_L1hit, desc="Cluster 1 load L1 hit"; + Ifetch0_L1hit, desc="Instruction fetch, hit in the L1"; + Ifetch1_L1hit, desc="Instruction fetch, hit in the L1"; + Ifetch0_L1miss, desc="Instruction fetch, missed in the L1"; + Ifetch1_L1miss, desc="Instruction fetch, missed in the L1"; + C0_Store_L1miss, desc="Cluster 0 store missed in L1"; + C0_Store_L1hit, desc="Cluster 0 store hit in L1"; + C1_Store_L1miss, desc="Cluster 1 store missed in L1"; + C1_Store_L1hit, desc="Cluster 1 store hit in L1"; + // NB Initiated events + NB_AckS, desc="NB Ack to Core Request"; + NB_AckM, desc="NB Ack to Core Request"; + NB_AckE, desc="NB Ack to Core Request"; + + NB_AckWB, desc="NB Ack for writeback"; + + // Memory System initiatied events + L1I_Repl, desc="Replace address from L1I"; // Presumed clean + L1D0_Repl, desc="Replace address from L1D0"; // Presumed clean + L1D1_Repl, desc="Replace address from L1D1"; // Presumed clean + L2_Repl, desc="Replace address from L2"; + + L2_to_L1D0, desc="L1 fill from L2"; + L2_to_L1D1, desc="L1 fill from L2"; + L2_to_L1I, desc="L1 fill from L2"; + + // Probe Events + PrbInvData, desc="probe, return O or M data"; + PrbInv, desc="probe, no need for data"; + PrbShrData, desc="probe downgrade, return O or M data"; + + } // END EVENTS + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + L1D0DataArrayRead, desc="Read the data array"; + L1D0DataArrayWrite, desc="Write the data array"; + L1D0TagArrayRead, desc="Read the data array"; + L1D0TagArrayWrite, desc="Write the data array"; + L1D1DataArrayRead, desc="Read the data array"; + L1D1DataArrayWrite, desc="Write the data array"; + L1D1TagArrayRead, desc="Read the data array"; + L1D1TagArrayWrite, desc="Write the data array"; + L1IDataArrayRead, desc="Read the data array"; + L1IDataArrayWrite, desc="Write the data array"; + L1ITagArrayRead, desc="Read the data array"; + L1ITagArrayWrite, desc="Write the data array"; + L2DataArrayRead, desc="Read the data array"; + L2DataArrayWrite, desc="Write the data array"; + L2TagArrayRead, desc="Read the data array"; + L2TagArrayWrite, desc="Write the data array"; + } + + + // BEGIN STRUCTURE DEFINITIONS + + + // Cache Entry + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff than memory)?"; + DataBlock DataBlk, desc="data for the block"; + bool FromL2, default="false", desc="block just moved from L2"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; + bool Shared, desc="Victim hit by shared probe"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<CorePair_TBE>", constructor="m_number_of_TBEs"; + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + // END STRUCTURE DEFINITIONS + + // BEGIN INTERNAL FUNCTIONS + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + bool addressInCore(Addr addr) { + return (L2cache.isTagPresent(addr) || L1Icache.isTagPresent(addr) || L1D0cache.isTagPresent(addr) || L1D1cache.isTagPresent(addr)); + } + + Entry getCacheEntry(Addr address), return_by_pointer="yes" { + Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address)); + return L2cache_entry; + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return tbe.DataBlk; + } else { + return getCacheEntry(addr).DataBlk; + } + } + + Entry getL1CacheEntry(Addr addr, int cluster), return_by_pointer="yes" { + if (cluster == 0) { + Entry L1D0_entry := static_cast(Entry, "pointer", L1D0cache.lookup(addr)); + return L1D0_entry; + } else { + Entry L1D1_entry := static_cast(Entry, "pointer", L1D1cache.lookup(addr)); + return L1D1_entry; + } + } + + Entry getICacheEntry(Addr addr), return_by_pointer="yes" { + Entry c_entry := static_cast(Entry, "pointer", L1Icache.lookup(addr)); + return c_entry; + } + + bool presentOrAvail2(Addr addr) { + return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr); + } + + bool presentOrAvailI(Addr addr) { + return L1Icache.isTagPresent(addr) || L1Icache.cacheAvail(addr); + } + + bool presentOrAvailD0(Addr addr) { + return L1D0cache.isTagPresent(addr) || L1D0cache.cacheAvail(addr); + } + + bool presentOrAvailD1(Addr addr) { + return L1D1cache.isTagPresent(addr) || L1D1cache.cacheAvail(addr); + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if(is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return CorePair_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return CorePair_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(CorePair_State_to_permission(state)); + } + } + + MachineType testAndClearLocalHit(Entry cache_entry) { + assert(is_valid(cache_entry)); + if (cache_entry.FromL2) { + cache_entry.FromL2 := false; + return MachineType:L2Cache; + } else { + return MachineType:L1Cache; + } + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:L1D0DataArrayRead) { + L1D0cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L1D0DataArrayWrite) { + L1D0cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L1D0TagArrayRead) { + L1D0cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L1D0TagArrayWrite) { + L1D0cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } else if (request_type == RequestType:L1D1DataArrayRead) { + L1D1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L1D1DataArrayWrite) { + L1D1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L1D1TagArrayRead) { + L1D1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L1D1TagArrayWrite) { + L1D1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } else if (request_type == RequestType:L1IDataArrayRead) { + L1Icache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L1IDataArrayWrite) { + L1Icache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L1ITagArrayRead) { + L1Icache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L1ITagArrayWrite) { + L1Icache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } else if (request_type == RequestType:L2DataArrayRead) { + L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L2DataArrayWrite) { + L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L2TagArrayRead) { + L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L2TagArrayWrite) { + L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:L2DataArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L2DataArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L2TagArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L2TagArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1D0DataArrayRead) { + return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1D0DataArrayWrite) { + return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1D0TagArrayRead) { + return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1D0TagArrayWrite) { + return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1D1DataArrayRead) { + return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1D1DataArrayWrite) { + return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1D1TagArrayRead) { + return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1D1TagArrayWrite) { + return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1IDataArrayRead) { + return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1IDataArrayWrite) { + return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1ITagArrayRead) { + return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1ITagArrayWrite) { + return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr); + + } else { + return true; + } + } + + // END INTERNAL FUNCTIONS + + // ** OUT_PORTS ** + + out_port(requestNetwork_out, CPURequestMsg, requestFromCore); + out_port(responseNetwork_out, ResponseMsg, responseFromCore); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + out_port(unblockNetwork_out, UnblockMsg, unblockFromCore); + + // ** IN_PORTS ** + + in_port(triggerQueue_in, TriggerMsg, triggerQueue, block_on="addr") { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == TriggerType:L2_to_L1) { + if (in_msg.Dest == CacheId:L1I) { + trigger(Event:L2_to_L1I, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Dest == CacheId:L1D0) { + trigger(Event:L2_to_L1D0, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Dest == CacheId:L1D1) { + trigger(Event:L2_to_L1D1, in_msg.addr, cache_entry, tbe); + } else { + error("unexpected trigger dest"); + } + } + } + } + } + + + in_port(probeNetwork_in, NBProbeRequestMsg, probeToCore) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, NBProbeRequestMsg, block_on="addr") { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == ProbeRequestType:PrbInv) { + if (in_msg.ReturnData) { + trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { + assert(in_msg.ReturnData); + trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); + } + } + } + } + + + // ResponseNetwork + in_port(responseToCore_in, ResponseMsg, responseToCore) { + if (responseToCore_in.isReady(clockEdge())) { + peek(responseToCore_in, ResponseMsg, block_on="addr") { + + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == CoherenceResponseType:NBSysResp) { + if (in_msg.State == CoherenceState:Modified) { + trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe); + } else if (in_msg.State == CoherenceState:Shared) { + trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe); + } else if (in_msg.State == CoherenceState:Exclusive) { + trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) { + trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe); + } else { + error("Unexpected Response Message to Core"); + } + } + } + } + + // Nothing from the Unblock Network + + // Mandatory Queue + in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { + if (mandatoryQueue_in.isReady(clockEdge())) { + peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { + + Entry cache_entry := getCacheEntry(in_msg.LineAddress); + TBE tbe := TBEs.lookup(in_msg.LineAddress); + + if (in_msg.Type == RubyRequestType:IFETCH) { + // FETCH ACCESS + + if (L1Icache.isTagPresent(in_msg.LineAddress)) { + if (mod(in_msg.contextId, 2) == 0) { + trigger(Event:Ifetch0_L1hit, in_msg.LineAddress, cache_entry, tbe); + } else { + trigger(Event:Ifetch1_L1hit, in_msg.LineAddress, cache_entry, tbe); + } + } else { + if (presentOrAvail2(in_msg.LineAddress)) { + if (presentOrAvailI(in_msg.LineAddress)) { + if (mod(in_msg.contextId, 2) == 0) { + trigger(Event:Ifetch0_L1miss, in_msg.LineAddress, cache_entry, + tbe); + } else { + trigger(Event:Ifetch1_L1miss, in_msg.LineAddress, cache_entry, + tbe); + } + } else { + Addr victim := L1Icache.cacheProbe(in_msg.LineAddress); + trigger(Event:L1I_Repl, victim, + getCacheEntry(victim), TBEs.lookup(victim)); + } + } else { // Not present or avail in L2 + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } + } else { + // DATA ACCESS + if (mod(in_msg.contextId, 2) == 1) { + if (L1D1cache.isTagPresent(in_msg.LineAddress)) { + if (in_msg.Type == RubyRequestType:LD) { + trigger(Event:C1_Load_L1hit, in_msg.LineAddress, cache_entry, + tbe); + } else { + // Stores must write through, make sure L2 avail. + if (presentOrAvail2(in_msg.LineAddress)) { + trigger(Event:C1_Store_L1hit, in_msg.LineAddress, cache_entry, + tbe); + } else { + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } + } else { + if (presentOrAvail2(in_msg.LineAddress)) { + if (presentOrAvailD1(in_msg.LineAddress)) { + if (in_msg.Type == RubyRequestType:LD) { + trigger(Event:C1_Load_L1miss, in_msg.LineAddress, + cache_entry, tbe); + } else { + trigger(Event:C1_Store_L1miss, in_msg.LineAddress, + cache_entry, tbe); + } + } else { + Addr victim := L1D1cache.cacheProbe(in_msg.LineAddress); + trigger(Event:L1D1_Repl, victim, + getCacheEntry(victim), TBEs.lookup(victim)); + } + } else { // not present or avail in L2 + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } + } else { + Entry L1D0cache_entry := getL1CacheEntry(in_msg.LineAddress, 0); + if (is_valid(L1D0cache_entry)) { + if (in_msg.Type == RubyRequestType:LD) { + trigger(Event:C0_Load_L1hit, in_msg.LineAddress, cache_entry, + tbe); + } else { + if (presentOrAvail2(in_msg.LineAddress)) { + trigger(Event:C0_Store_L1hit, in_msg.LineAddress, cache_entry, + tbe); + } else { + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } + } else { + if (presentOrAvail2(in_msg.LineAddress)) { + if (presentOrAvailD0(in_msg.LineAddress)) { + if (in_msg.Type == RubyRequestType:LD) { + trigger(Event:C0_Load_L1miss, in_msg.LineAddress, + cache_entry, tbe); + } else { + trigger(Event:C0_Store_L1miss, in_msg.LineAddress, + cache_entry, tbe); + } + } else { + Addr victim := L1D0cache.cacheProbe(in_msg.LineAddress); + trigger(Event:L1D0_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } else { + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } + } + } + } + } + } + + + // ACTIONS + action(ii_invIcache, "ii", desc="invalidate iCache") { + if (L1Icache.isTagPresent(address)) { + L1Icache.deallocate(address); + } + } + + action(i0_invCluster, "i0", desc="invalidate cluster 0") { + if (L1D0cache.isTagPresent(address)) { + L1D0cache.deallocate(address); + } + } + + action(i1_invCluster, "i1", desc="invalidate cluster 1") { + if (L1D1cache.isTagPresent(address)) { + L1D1cache.deallocate(address); + } + } + + action(ib_invBothClusters, "ib", desc="invalidate both clusters") { + if (L1D0cache.isTagPresent(address)) { + L1D0cache.deallocate(address); + } + if (L1D1cache.isTagPresent(address)) { + L1D1cache.deallocate(address); + } + } + + action(i2_invL2, "i2", desc="invalidate L2") { + if(is_valid(cache_entry)) { + L2cache.deallocate(address); + } + unset_cache_entry(); + } + + action(mru_setMRU, "mru", desc="Update LRU state") { + L2cache.setMRU(address); + } + + action(mruD1_setD1cacheMRU, "mruD1", desc="Update LRU state") { + L1D1cache.setMRU(address); + } + + action(mruD0_setD0cacheMRU, "mruD0", desc="Update LRU state") { + L1D0cache.setMRU(address); + } + + action(mruI_setIcacheMRU, "mruI", desc="Update LRU state") { + L1Icache.setMRU(address); + } + + action(n_issueRdBlk, "n", desc="Issue RdBlk") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlk; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + DPRINTF(RubySlicc,"%s\n",out_msg.Destination); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkM; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkS; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(vd_victim, "vd", desc="Victimize M/O L2 Data") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + assert(is_valid(cache_entry)); + out_msg.DataBlk := cache_entry.DataBlk; + assert(cache_entry.Dirty); + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicDirty; + out_msg.InitialRequestTime := curCycle(); + if (cache_entry.CacheState == State:O) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + } + } + + action(vc_victim, "vc", desc="Victimize E/S L2 Data") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicClean; + out_msg.InitialRequestTime := curCycle(); + if (cache_entry.CacheState == State:S) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + } + } + + action(a0_allocateL1D, "a0", desc="Allocate L1D0 Block") { + if (L1D0cache.isTagPresent(address) == false) { + L1D0cache.allocateVoid(address, new Entry); + } + } + + action(a1_allocateL1D, "a1", desc="Allocate L1D1 Block") { + if (L1D1cache.isTagPresent(address) == false) { + L1D1cache.allocateVoid(address, new Entry); + } + } + + action(ai_allocateL1I, "ai", desc="Allocate L1I Block") { + if (L1Icache.isTagPresent(address) == false) { + L1Icache.allocateVoid(address, new Entry); + } + } + + action(a2_allocateL2, "a2", desc="Allocate L2 Block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L2cache.allocate(address, new Entry)); + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + assert(is_valid(cache_entry)); + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs + tbe.Dirty := cache_entry.Dirty; + tbe.Shared := false; + } + + action(d_deallocateTBE, "d", desc="Deallocate TBE") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { + mandatoryQueue_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { + responseToCore_in.dequeue(clockEdge()); + } + + action(pt_popTriggerQueue, "pt", desc="Pop Trigger Queue") { + triggerQueue_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="pop probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + + action(il0_loadDone, "il0", desc="Cluster 0 i load done") { + Entry entry := getICacheEntry(address); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + assert(is_valid(entry)); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer.readCallback(address, + l2entry.DataBlk, + true, + testAndClearLocalHit(entry)); + } + + action(il1_loadDone, "il1", desc="Cluster 1 i load done") { + Entry entry := getICacheEntry(address); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + assert(is_valid(entry)); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer1.readCallback(address, + l2entry.DataBlk, + true, + testAndClearLocalHit(entry)); + } + + action(l0_loadDone, "l0", desc="Cluster 0 load done") { + Entry entry := getL1CacheEntry(address, 0); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + assert(is_valid(entry)); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer.readCallback(address, + l2entry.DataBlk, + true, + testAndClearLocalHit(entry)); + } + + action(l1_loadDone, "l1", desc="Cluster 1 load done") { + Entry entry := getL1CacheEntry(address, 1); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + assert(is_valid(entry)); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer1.readCallback(address, + l2entry.DataBlk, + true, + testAndClearLocalHit(entry)); + } + + action(xl0_loadDone, "xl0", desc="Cluster 0 load done") { + peek(responseToCore_in, ResponseMsg) { + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + DPRINTF(ProtocolTrace, "CP Load Done 0 -- address %s, data: %s\n", address, l2entry.DataBlk); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer.readCallback(address, + l2entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + } + + action(xl1_loadDone, "xl1", desc="Cluster 1 load done") { + peek(responseToCore_in, ResponseMsg) { + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer1.readCallback(address, + l2entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + } + + action(xi0_loadDone, "xi0", desc="Cluster 0 i-load done") { + peek(responseToCore_in, ResponseMsg) { + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer.readCallback(address, + l2entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + } + + action(xi1_loadDone, "xi1", desc="Cluster 1 i-load done") { + peek(responseToCore_in, ResponseMsg) { + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer1.readCallback(address, + l2entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + } + + action(s0_storeDone, "s0", desc="Cluster 0 store done") { + Entry entry := getL1CacheEntry(address, 0); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + sequencer.writeCallback(address, + cache_entry.DataBlk, + true, + testAndClearLocalHit(entry)); + cache_entry.Dirty := true; + entry.DataBlk := cache_entry.DataBlk; + entry.Dirty := true; + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + } + + action(s1_storeDone, "s1", desc="Cluster 1 store done") { + Entry entry := getL1CacheEntry(address, 1); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + sequencer1.writeCallback(address, + cache_entry.DataBlk, + true, + testAndClearLocalHit(entry)); + cache_entry.Dirty := true; + entry.Dirty := true; + entry.DataBlk := cache_entry.DataBlk; + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + } + + action(xs0_storeDone, "xs0", desc="Cluster 0 store done") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getL1CacheEntry(address, 0); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + sequencer.writeCallback(address, + cache_entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + cache_entry.Dirty := true; + entry.Dirty := true; + entry.DataBlk := cache_entry.DataBlk; + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + } + } + + action(xs1_storeDone, "xs1", desc="Cluster 1 store done") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getL1CacheEntry(address, 1); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + sequencer1.writeCallback(address, + cache_entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + cache_entry.Dirty := true; + entry.Dirty := true; + entry.DataBlk := cache_entry.DataBlk; + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + } + } + + action(forward_eviction_to_cpu0, "fec0", desc="sends eviction information to processor0") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer.evictionCallback(address); + } + } + + action(forward_eviction_to_cpu1, "fec1", desc="sends eviction information to processor1") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer1.evictionCallback(address); + } + } + + action(ci_copyL2ToL1, "ci", desc="copy L2 data to L1") { + Entry entry := getICacheEntry(address); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.Dirty := cache_entry.Dirty; + entry.DataBlk := cache_entry.DataBlk; + entry.FromL2 := true; + } + + action(c0_copyL2ToL1, "c0", desc="copy L2 data to L1") { + Entry entry := getL1CacheEntry(address, 0); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.Dirty := cache_entry.Dirty; + entry.DataBlk := cache_entry.DataBlk; + entry.FromL2 := true; + } + + action(c1_copyL2ToL1, "c1", desc="copy L2 data to L1") { + Entry entry := getL1CacheEntry(address, 1); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.Dirty := cache_entry.Dirty; + entry.DataBlk := cache_entry.DataBlk; + entry.FromL2 := true; + } + + action(fi_L2ToL1, "fi", desc="L2 to L1 inst fill") { + enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L2_to_L1; + out_msg.Dest := CacheId:L1I; + } + } + + action(f0_L2ToL1, "f0", desc="L2 to L1 data fill") { + enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L2_to_L1; + out_msg.Dest := CacheId:L1D0; + } + } + + action(f1_L2ToL1, "f1", desc="L2 to L1 data fill") { + enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L2_to_L1; + out_msg.Dest := CacheId:L1D1; + } + } + + action(wi_writeIcache, "wi", desc="write data to icache (and l2)") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getICacheEntry(address); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.DataBlk := in_msg.DataBlk; + entry.Dirty := in_msg.Dirty; + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(w0_writeDcache, "w0", desc="write data to dcache 0 (and l2)") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getL1CacheEntry(address, 0); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + DPRINTF(ProtocolTrace, "CP writeD0: address %s, data: %s\n", address, in_msg.DataBlk); + entry.DataBlk := in_msg.DataBlk; + entry.Dirty := in_msg.Dirty; + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(w1_writeDcache, "w1", desc="write data to dcache 1 (and l2)") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getL1CacheEntry(address, 1); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.DataBlk := in_msg.DataBlk; + entry.Dirty := in_msg.Dirty; + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { + peek(responseToCore_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:StaleNotif; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(wb_data, "wb", desc="write back data") { + peek(responseToCore_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUData; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + if (tbe.Shared) { + out_msg.NbReqShared := true; + } else { + out_msg.NbReqShared := false; + } + out_msg.State := CoherenceState:Shared; // faux info + out_msg.MessageSize := MessageSizeType:Writeback_Data; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Dirty := false; + out_msg.Ntsl := true; + out_msg.Hit := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(ph_sendProbeResponseHit, "ph", desc="send probe ack PrbShrData, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + assert(addressInCore(address) || is_valid(tbe)); + out_msg.Dirty := false; // only true if sending back data i think + out_msg.Hit := true; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pb_sendProbeResponseBackprobe, "pb", desc="send probe ack PrbShrData, no data, check for L1 residence") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + if (addressInCore(address)) { + out_msg.Hit := true; + } else { + out_msg.Hit := false; + } + out_msg.Dirty := false; // not sending back data, so def. not dirty + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.DataBlk := cache_entry.DataBlk; + assert(cache_entry.Dirty); + out_msg.Dirty := true; + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.DataBlk := cache_entry.DataBlk; + assert(cache_entry.Dirty); + out_msg.Dirty := true; + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(tbe)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := tbe.DataBlk; + assert(tbe.Dirty); + out_msg.Dirty := true; + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(s_setSharedFlip, "s", desc="hit by shared probe, status may be different") { + assert(is_valid(tbe)); + tbe.Shared := true; + } + + action(uu_sendUnblock, "uu", desc="state changed, unblock") { + enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { + out_msg.addr := address; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(l2m_profileMiss, "l2m", desc="l2m miss profile") { + ++L2cache.demand_misses; + } + + action(l10m_profileMiss, "l10m", desc="l10m miss profile") { + ++L1D0cache.demand_misses; + } + + action(l11m_profileMiss, "l11m", desc="l11m miss profile") { + ++L1D1cache.demand_misses; + } + + action(l1im_profileMiss, "l1lm", desc="l1im miss profile") { + ++L1Icache.demand_misses; + } + + action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") { + probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(xx_recycleResponseQueue, "xx", desc="recycle response queue") { + responseToCore_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") { + mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + // END ACTIONS + + // BEGIN TRANSITIONS + + // transitions from base + transition(I, C0_Load_L1miss, I_E0S) {L1D0TagArrayRead, L2TagArrayRead} { + // track misses, if implemented + // since in I state, L2 miss as well + l2m_profileMiss; + l10m_profileMiss; + a0_allocateL1D; + a2_allocateL2; + i1_invCluster; + ii_invIcache; + n_issueRdBlk; + p_popMandatoryQueue; + } + + transition(I, C1_Load_L1miss, I_E1S) {L1D1TagArrayRead, L2TagArrayRead} { + // track misses, if implemented + // since in I state, L2 miss as well + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + a2_allocateL2; + i0_invCluster; + ii_invIcache; + n_issueRdBlk; + p_popMandatoryQueue; + } + + transition(I, Ifetch0_L1miss, S0) {L1ITagArrayRead,L2TagArrayRead} { + // track misses, if implemented + // L2 miss as well + l2m_profileMiss; + l1im_profileMiss; + ai_allocateL1I; + a2_allocateL2; + ib_invBothClusters; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(I, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} { + // track misses, if implemented + // L2 miss as well + l2m_profileMiss; + l1im_profileMiss; + ai_allocateL1I; + a2_allocateL2; + ib_invBothClusters; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(I, C0_Store_L1miss, I_M0) {L1D0TagArrayRead, L2TagArrayRead} { + l2m_profileMiss; + l10m_profileMiss; + a0_allocateL1D; + a2_allocateL2; + i1_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(I, C1_Store_L1miss, I_M1) {L1D0TagArrayRead, L2TagArrayRead} { + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + a2_allocateL2; + i0_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(S, C0_Load_L1miss, S_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(S, C1_Load_L1miss, S_F1) {L1D1TagArrayRead,L2TagArrayRead, L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(S, Ifetch0_L1miss, Si_F0) {L1ITagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l1im_profileMiss; + ai_allocateL1I; + fi_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(S, Ifetch1_L1miss, Si_F1) {L1ITagArrayRead,L2TagArrayRead, L2DataArrayRead} { + l1im_profileMiss; + ai_allocateL1I; + fi_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition({S}, {C0_Store_L1hit, C0_Store_L1miss}, S_M0) {L1D0TagArrayRead, L2TagArrayRead} { + l2m_profileMiss; + l10m_profileMiss; + a0_allocateL1D; + mruD0_setD0cacheMRU; + i1_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition({S}, {C1_Store_L1hit, C1_Store_L1miss}, S_M1) {L1D1TagArrayRead, L2TagArrayRead} { + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + mruD1_setD1cacheMRU; + i0_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(Es, C0_Load_L1miss, Es_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { // can this be folded with S_F? + a0_allocateL1D; + l10m_profileMiss; + f0_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(Es, C1_Load_L1miss, Es_F1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} { // can this be folded with S_F? + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(Es, Ifetch0_L1miss, S0) {L1ITagArrayRead, L1ITagArrayWrite, L2TagArrayRead, L2TagArrayWrite} { + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + ib_invBothClusters; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(Es, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} { + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + ib_invBothClusters; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + // THES SHOULD NOT BE INSTANTANEOUS BUT OH WELL FOR NOW + transition(Es, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} { + a0_allocateL1D; + i1_invCluster; + s0_storeDone; // instantaneous L1/L2 dirty - no writethrough delay + mruD0_setD0cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(Es, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} { + a1_allocateL1D; + i0_invCluster; + s1_storeDone; + mruD1_setD1cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(E0, C0_Load_L1miss, E0_F) {L1D0TagArrayRead,L2TagArrayRead, L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(E0, C1_Load_L1miss, E0_Es) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(E0, Ifetch0_L1miss, S0) {L2TagArrayRead, L1ITagArrayRead} { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + i0_invCluster; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(E0, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead} { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + i0_invCluster; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(E0, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} { + a0_allocateL1D; + s0_storeDone; + mruD0_setD0cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(E0, C1_Store_L1miss, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1TagArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} { + l11m_profileMiss; + a1_allocateL1D; + i0_invCluster; + s1_storeDone; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(E1, C1_Load_L1miss, E1_F) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(E1, C0_Load_L1miss, E1_Es) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l11m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(E1, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead} { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + i1_invCluster; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(E1, Ifetch0_L1miss, S0) {L2TagArrayRead, L1ITagArrayRead} { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + i1_invCluster; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(E1, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite} { + a1_allocateL1D; + s1_storeDone; + mruD1_setD1cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(E1, C0_Store_L1miss, M0) {L1D0TagArrayRead, L2TagArrayRead, L2TagArrayWrite, L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite} { + l10m_profileMiss; + a0_allocateL1D; + i1_invCluster; + s0_storeDone; + mru_setMRU; + p_popMandatoryQueue; + } + + transition({O}, {C0_Store_L1hit, C0_Store_L1miss}, O_M0) {L1D0TagArrayRead,L2TagArrayRead} { + l2m_profileMiss; // permissions miss, still issue CtoD + l10m_profileMiss; + a0_allocateL1D; + mruD0_setD0cacheMRU; + i1_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition({O}, {C1_Store_L1hit, C1_Store_L1miss}, O_M1) {L1D1TagArrayRead, L2TagArrayRead} { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l11m_profileMiss; + a1_allocateL1D; + mruD1_setD1cacheMRU; + i0_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(O, C0_Load_L1miss, O_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(O, C1_Load_L1miss, O_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(Ms, C0_Load_L1miss, Ms_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(Ms, C1_Load_L1miss, Ms_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition({Ms, M0, M1, O}, Ifetch0_L1miss, MO_S0) {L1ITagArrayRead, L2DataArrayRead, L2TagArrayRead} { + l2m_profileMiss; // permissions miss + l1im_profileMiss; + ai_allocateL1I; + t_allocateTBE; + ib_invBothClusters; + vd_victim; +// i2_invL2; + p_popMandatoryQueue; + } + + transition({Ms, M0, M1, O}, Ifetch1_L1miss, MO_S1) {L1ITagArrayRead, L2TagArrayRead, L2DataArrayRead } { + l2m_profileMiss; // permissions miss + l1im_profileMiss; + ai_allocateL1I; + t_allocateTBE; + ib_invBothClusters; + vd_victim; +// i2_invL2; + p_popMandatoryQueue; + } + + transition(Ms, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} { + a0_allocateL1D; + i1_invCluster; + s0_storeDone; + mruD0_setD0cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(Ms, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} { + a1_allocateL1D; + i0_invCluster; + s1_storeDone; + mruD1_setD1cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(M0, C0_Load_L1miss, M0_F) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(M0, C1_Load_L1miss, M0_Ms) {L2TagArrayRead, L2DataArrayRead,L1D0TagArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(M0, {C0_Store_L1hit, C0_Store_L1miss}) {L1D0TagArrayRead,L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead} { + a0_allocateL1D; + s0_storeDone; + mruD0_setD0cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(M0, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead, L2TagArrayWrite} { + a1_allocateL1D; + i0_invCluster; + s1_storeDone; + mruD1_setD1cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(M1, C0_Load_L1miss, M1_Ms) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(M1, C1_Load_L1miss, M1_F) {L1D1TagArrayRead,L2TagArrayRead, L2DataArrayRead} { + a1_allocateL1D; + f1_L2ToL1; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(M1, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} { + a0_allocateL1D; + i1_invCluster; + s0_storeDone; + mruD0_setD0cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(M1, {C1_Store_L1hit, C1_Store_L1miss}) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayWrite} { + a1_allocateL1D; + s1_storeDone; + mruD1_setD1cacheMRU; + mru_setMRU; + p_popMandatoryQueue; + } + + // end transitions from base + + // Begin simple hit transitions + transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es, + Ms_F1, M0_Ms}, C0_Load_L1hit) {L1D0TagArrayRead, L1D0DataArrayRead} { + // track hits, if implemented + l0_loadDone; + mruD0_setD0cacheMRU; + p_popMandatoryQueue; + } + + transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es, + Ms_F0, M1_Ms}, C1_Load_L1hit) {L1D1TagArrayRead, L1D1DataArrayRead} { + // track hits, if implemented + l1_loadDone; + mruD1_setD1cacheMRU; + p_popMandatoryQueue; + } + + transition({S, S_C, S_F0, S_F1, S_F}, Ifetch0_L1hit) {L1ITagArrayRead, L1IDataArrayRead} { + // track hits, if implemented + il0_loadDone; + mruI_setIcacheMRU; + p_popMandatoryQueue; + } + + transition({S, S_C, S_F0, S_F1, S_F}, Ifetch1_L1hit) {L1ITagArrayRead, L1IDataArrayWrite} { + // track hits, if implemented + il1_loadDone; + mruI_setIcacheMRU; + p_popMandatoryQueue; + } + + // end simple hit transitions + + // Transitions from transient states + + // recycles + transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES, + IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F, + E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, C0_Load_L1hit) {} { + zz_recycleMandatoryQueue; + } + + transition({IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M1, + O_M1, S0, S1, I_C, S0_C, S1_C, S_C}, C0_Load_L1miss) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES, + IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F, + E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, C1_Load_L1hit) {} { + zz_recycleMandatoryQueue; + } + + transition({IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M0, + O_M0, S0, S1, I_C, S0_C, S1_C, S_C}, C1_Load_L1miss) {} { + zz_recycleMandatoryQueue; + } + + transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, {Ifetch0_L1hit, Ifetch1_L1hit}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES, + IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, ES_I, MO_I, S_F0, S_F1, S_F, + O_F0, O_F1, O_F, S_M0, S_M1, O_M0, O_M1, Es_F0, Es_F1, Es_F, E0_F, + E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, I_C, + S_C}, {Ifetch0_L1miss, Ifetch1_L1miss}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_E1S, IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, S_F1, O_F1, + Si_F0, Si_F1, S_M1, O_M1, S0, S1, Es_F1, E1_F, E0_Es, Ms_F1, M0_Ms, + M1_F, I_C, S0_C, S1_C, S_C}, {C0_Store_L1miss}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_E0S, IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1 S_F0, O_F0, + Si_F0, Si_F1, S_M0, O_M0, S0, S1, Es_F0, E0_F, E1_Es, Ms_F0, M0_F, + M1_Ms, I_C, S0_C, S1_C, S_C}, {C1_Store_L1miss}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES, + IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M0, O_M0, Es_F0, Es_F1, Es_F, E0_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_Ms}, {C0_Store_L1hit}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES, + IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M1, + O_M1, Es_F0, Es_F1, Es_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, + M0_Ms, M1_F, M1_Ms}, {C1_Store_L1hit}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES, + IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F, + E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, L1D0_Repl) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES, + IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F, + E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, L1D1_Repl) {} { + zz_recycleMandatoryQueue; + } + + transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, L1I_Repl) {} { + zz_recycleMandatoryQueue; + } + + transition({S_C, S0_C, S1_C, S0, S1, Si_F0, Si_F1, I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, S_M0, O_M0, S_M1, O_M1, Es_F0, Es_F1, Es_F, E0_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, MO_S0, MO_S1, IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, L2_Repl) {} { + zz_recycleMandatoryQueue; + } + + transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, {NB_AckS, + PrbInvData, PrbInv, PrbShrData}) {} { + yy_recycleProbeQueue; // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary. + } + + transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES}, NB_AckE) {} { + xx_recycleResponseQueue; // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary. + } + + transition({E0_Es, E1_F, Es_F1}, C0_Load_L1miss, Es_F) {L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(S_F1, C0_Load_L1miss, S_F) {L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(O_F1, C0_Load_L1miss, O_F) {L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition({Ms_F1, M0_Ms, M1_F}, C0_Load_L1miss, Ms_F) {L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(I_M0, C1_Load_L1miss, I_M0Ms) {} { + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(I_M1, C0_Load_L1miss, I_M1Ms) {} { + l2m_profileMiss; + l10m_profileMiss; + a0_allocateL1D; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(I_M0, C1_Store_L1miss, I_M0M1) {} { + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(I_M1, C0_Store_L1miss, I_M1M0) {} { + l2m_profileMiss; + l10m_profileMiss; + a0_allocateL1D; + mru_setMRU; + p_popMandatoryQueue; + } + + transition(I_E0S, C1_Load_L1miss, I_ES) {} { + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + p_popMandatoryQueue; + } + + transition(I_E1S, C0_Load_L1miss, I_ES) {} { + l2m_profileMiss; + l10m_profileMiss; + a0_allocateL1D; + p_popMandatoryQueue; + } + + transition({E1_Es, E0_F, Es_F0}, C1_Load_L1miss, Es_F) {L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(S_F0, C1_Load_L1miss, S_F) {L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(O_F0, C1_Load_L1miss, O_F) {L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition({Ms_F0, M1_Ms, M0_F}, C1_Load_L1miss, Ms_F) { L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es, Ms_F1, M0_Ms}, L1D0_Repl) {L1D0TagArrayRead} { + i0_invCluster; + } + + transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es, Ms_F0, M1_Ms}, L1D1_Repl) {L1D1TagArrayRead} { + i1_invCluster; + } + + transition({S, S_C, S_F0, S_F1}, L1I_Repl) {L1ITagArrayRead} { + ii_invIcache; + } + + transition({S, E0, E1, Es}, L2_Repl, ES_I) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead, L1D1TagArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + t_allocateTBE; + vc_victim; + ib_invBothClusters; + i2_invL2; + ii_invIcache; + } + + transition({Ms, M0, M1, O}, L2_Repl, MO_I) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead, L1D1TagArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + t_allocateTBE; + vd_victim; + i2_invL2; + ib_invBothClusters; // nothing will happen for D0 on M1, vice versa + } + + transition(S0, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + wi_writeIcache; + xi0_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(S1, NB_AckS, S) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + wi_writeIcache; + xi1_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(S0_C, NB_AckS, S_C) {L1D0DataArrayWrite,L2DataArrayWrite} { + wi_writeIcache; + xi0_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(S1_C, NB_AckS, S_C) {L1D1DataArrayWrite, L2DataArrayWrite} { + wi_writeIcache; + xi1_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_M0, NB_AckM, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} { + w0_writeDcache; + xs0_storeDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + w1_writeDcache; + xs1_storeDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + // THESE MO->M1 should not be instantaneous but oh well for now. + transition(I_M0M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + w0_writeDcache; + xs0_storeDone; + uu_sendUnblock; + i0_invCluster; + s1_storeDone; + pr_popResponseQueue; + } + + transition(I_M1M0, NB_AckM, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + w1_writeDcache; + xs1_storeDone; + uu_sendUnblock; + i1_invCluster; + s0_storeDone; + pr_popResponseQueue; + } + + // Above shoudl be more like this, which has some latency to xfer to L1 + transition(I_M0Ms, NB_AckM, M0_Ms) {L1D0DataArrayWrite,L2DataArrayWrite} { + w0_writeDcache; + xs0_storeDone; + uu_sendUnblock; + f1_L2ToL1; + pr_popResponseQueue; + } + + transition(I_M1Ms, NB_AckM, M1_Ms) {L1D1DataArrayWrite, L2DataArrayWrite} { + w1_writeDcache; + xs1_storeDone; + uu_sendUnblock; + f0_L2ToL1; + pr_popResponseQueue; + } + + transition(I_E0S, NB_AckE, E0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + w0_writeDcache; + xl0_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_E1S, NB_AckE, E1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + w1_writeDcache; + xl1_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_ES, NB_AckE, Es) {L1D1DataArrayWrite, L1D1TagArrayWrite, L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite } { + w0_writeDcache; + xl0_loadDone; + w1_writeDcache; + xl1_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_E0S, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} { + w0_writeDcache; + xl0_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_E1S, NB_AckS, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} { + w1_writeDcache; + xl1_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_ES, NB_AckS, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} { + w0_writeDcache; + xl0_loadDone; + w1_writeDcache; + xl1_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(S_F0, L2_to_L1D0, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(S_F1, L2_to_L1D1, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(Si_F0, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + ci_copyL2ToL1; + mru_setMRU; + il0_loadDone; + pt_popTriggerQueue; + } + + transition(Si_F1, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + ci_copyL2ToL1; + mru_setMRU; + il1_loadDone; + pt_popTriggerQueue; + } + + transition(S_F, L2_to_L1D0, S_F1) { L1D0DataArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(S_F, L2_to_L1D1, S_F0) { L1D1DataArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(O_F0, L2_to_L1D0, O) { L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(O_F1, L2_to_L1D1, O) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(O_F, L2_to_L1D0, O_F1) { L1D0DataArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(O_F, L2_to_L1D1, O_F0) { L1D1DataArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(M1_F, L2_to_L1D1, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(M0_F, L2_to_L1D0, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Ms_F0, L2_to_L1D0, Ms) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Ms_F1, L2_to_L1D1, Ms) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(Ms_F, L2_to_L1D0, Ms_F1) {L1D0DataArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Ms_F, L2_to_L1D1, Ms_F0) {L1IDataArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(M1_Ms, L2_to_L1D0, Ms) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(M0_Ms, L2_to_L1D1, Ms) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(Es_F0, L2_to_L1D0, Es) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Es_F1, L2_to_L1D1, Es) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(Es_F, L2_to_L1D0, Es_F1) {L2TagArrayRead, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Es_F, L2_to_L1D1, Es_F0) {L2TagArrayRead, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(E0_F, L2_to_L1D0, E0) {L2TagArrayRead, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(E1_F, L2_to_L1D1, E1) {L2TagArrayRead, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(E1_Es, L2_to_L1D0, Es) {L2TagArrayRead, L2DataArrayRead} { + c0_copyL2ToL1; + mru_setMRU; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(E0_Es, L2_to_L1D1, Es) {L2TagArrayRead, L2DataArrayRead} { + c1_copyL2ToL1; + mru_setMRU; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(IF_E0S, L2_to_L1D0, I_E0S) {} { + pt_popTriggerQueue; + } + + transition(IF_E1S, L2_to_L1D1, I_E1S) {} { + pt_popTriggerQueue; + } + + transition(IF_ES, L2_to_L1D0, IF1_ES) {} { + pt_popTriggerQueue; + } + + transition(IF_ES, L2_to_L1D1, IF0_ES) {} { + pt_popTriggerQueue; + } + + transition(IF0_ES, L2_to_L1D0, I_ES) {} { + pt_popTriggerQueue; + } + + transition(IF1_ES, L2_to_L1D1, I_ES) {} { + pt_popTriggerQueue; + } + + transition(F_S0, L2_to_L1I, S0) {} { + pt_popTriggerQueue; + } + + transition(F_S1, L2_to_L1I, S1) {} { + pt_popTriggerQueue; + } + + transition({S_M0, O_M0}, NB_AckM, M0) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + mru_setMRU; + xs0_storeDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition({S_M1, O_M1}, NB_AckM, M1) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + mru_setMRU; + xs1_storeDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(MO_I, NB_AckWB, I) {L2TagArrayWrite} { + wb_data; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(ES_I, NB_AckWB, I) {L2TagArrayWrite} { + wb_data; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(MO_S0, NB_AckWB, S0) {L2TagArrayWrite} { + wb_data; + i2_invL2; + a2_allocateL2; + d_deallocateTBE; // FOO + nS_issueRdBlkS; + pr_popResponseQueue; + } + + transition(MO_S1, NB_AckWB, S1) {L2TagArrayWrite} { + wb_data; + i2_invL2; + a2_allocateL2; + d_deallocateTBE; // FOO + nS_issueRdBlkS; + pr_popResponseQueue; + } + + // Writeback cancel "ack" + transition(I_C, NB_AckWB, I) {L2TagArrayWrite} { + ss_sendStaleNotification; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(S0_C, NB_AckWB, S0) {L2TagArrayWrite} { + ss_sendStaleNotification; + pr_popResponseQueue; + } + + transition(S1_C, NB_AckWB, S1) {L2TagArrayWrite} { + ss_sendStaleNotification; + pr_popResponseQueue; + } + + transition(S_C, NB_AckWB, S) {L2TagArrayWrite} { + ss_sendStaleNotification; + pr_popResponseQueue; + } + + // Begin Probe Transitions + + transition({Ms, M0, M1, O}, PrbInvData, I) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pd_sendProbeResponseData; + i2_invL2; + ib_invBothClusters; + pp_popProbeQueue; + } + + transition({Es, E0, E1, S, I}, PrbInvData, I) {L2TagArrayRead, L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + ib_invBothClusters; + ii_invIcache; // only relevant for S + pp_popProbeQueue; + } + + transition(S_C, PrbInvData, I_C) {L2TagArrayWrite} { + t_allocateTBE; + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(I_C, PrbInvData, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + pp_popProbeQueue; + } + + transition({Ms, M0, M1, O, Es, E0, E1, S, I}, PrbInv, I) {L2TagArrayRead, L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; // nothing will happen in I + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(S_C, PrbInv, I_C) {L2TagArrayWrite} { + t_allocateTBE; + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(I_C, PrbInv, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition({Ms, M0, M1, O}, PrbShrData, O) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({Es, E0, E1, S}, PrbShrData, S) {L2TagArrayRead, L2TagArrayWrite} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition(S_C, PrbShrData) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition({I, I_C}, PrbShrData) {L2TagArrayRead} { + pb_sendProbeResponseBackprobe; + pp_popProbeQueue; + } + + transition({I_M0, I_E0S}, {PrbInv, PrbInvData}) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; // must invalidate current data (only relevant for I_M0) + a0_allocateL1D; // but make sure there is room for incoming data when it arrives + pp_popProbeQueue; + } + + transition({I_M1, I_E1S}, {PrbInv, PrbInvData}) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; // must invalidate current data (only relevant for I_M1) + a1_allocateL1D; // but make sure there is room for incoming data when it arrives + pp_popProbeQueue; + } + + transition({I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_ES}, {PrbInv, PrbInvData, PrbShrData}) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + a0_allocateL1D; + a1_allocateL1D; + pp_popProbeQueue; + } + + transition({I_M0, I_E0S, I_M1, I_E1S}, PrbShrData) {} { + pb_sendProbeResponseBackprobe; + pp_popProbeQueue; + } + + transition(ES_I, PrbInvData, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(MO_I, PrbInvData, I_C) {} { + pdt_sendProbeResponseDataFromTBE; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(MO_I, PrbInv, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(ES_I, PrbInv, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(ES_I, PrbShrData, ES_I) {} { + ph_sendProbeResponseHit; + s_setSharedFlip; + pp_popProbeQueue; + } + + transition(MO_I, PrbShrData, MO_I) {} { + pdt_sendProbeResponseDataFromTBE; + s_setSharedFlip; + pp_popProbeQueue; + } + + transition(MO_S0, PrbInvData, S0_C) {L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pdt_sendProbeResponseDataFromTBE; + i2_invL2; + a2_allocateL2; + d_deallocateTBE; + nS_issueRdBlkS; + pp_popProbeQueue; + } + + transition(MO_S1, PrbInvData, S1_C) {L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pdt_sendProbeResponseDataFromTBE; + i2_invL2; + a2_allocateL2; + d_deallocateTBE; + nS_issueRdBlkS; + pp_popProbeQueue; + } + + transition(MO_S0, PrbInv, S0_C) {L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + a2_allocateL2; + d_deallocateTBE; + nS_issueRdBlkS; + pp_popProbeQueue; + } + + transition(MO_S1, PrbInv, S1_C) {L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + a2_allocateL2; + d_deallocateTBE; + nS_issueRdBlkS; + pp_popProbeQueue; + } + + transition({MO_S0, MO_S1}, PrbShrData) {} { + pdt_sendProbeResponseDataFromTBE; + s_setSharedFlip; + pp_popProbeQueue; + } + + transition({S_F0, Es_F0, E0_F, E1_Es}, {PrbInvData, PrbInv}, IF_E0S) {}{ + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + // invalidate everything you've got + ib_invBothClusters; + ii_invIcache; + i2_invL2; + // but make sure you have room for what you need from the fill + a0_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({S_F1, Es_F1, E1_F, E0_Es}, {PrbInvData, PrbInv}, IF_E1S) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + // invalidate everything you've got + ib_invBothClusters; + ii_invIcache; + i2_invL2; + // but make sure you have room for what you need from the fill + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({S_F, Es_F}, {PrbInvData, PrbInv}, IF_ES) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + // invalidate everything you've got + ib_invBothClusters; + ii_invIcache; + i2_invL2; + // but make sure you have room for what you need from the fill + a0_allocateL1D; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition(Si_F0, {PrbInvData, PrbInv}, F_S0) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + nS_issueRdBlkS; + pp_popProbeQueue; + } + + transition(Si_F1, {PrbInvData, PrbInv}, F_S1) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + nS_issueRdBlkS; + pp_popProbeQueue; + } + + transition({Es_F0, E0_F, E1_Es}, PrbShrData, S_F0) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition({Es_F1, E1_F, E0_Es}, PrbShrData, S_F1) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition(Es_F, PrbShrData, S_F) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition({S_F0, S_F1, S_F, Si_F0, Si_F1}, PrbShrData) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition(S_M0, PrbInvData, I_M0) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pim_sendProbeResponseInvMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition(O_M0, PrbInvData, I_M0) {L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pdm_sendProbeResponseDataMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S_M0, O_M0}, {PrbInv}, I_M0) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pim_sendProbeResponseInvMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition(S_M1, PrbInvData, I_M1) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pim_sendProbeResponseInvMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition(O_M1, PrbInvData, I_M1) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pdm_sendProbeResponseDataMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S_M1, O_M1}, {PrbInv}, I_M1) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pim_sendProbeResponseInvMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S0, S0_C}, {PrbInvData, PrbInv}) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S1, S1_C}, {PrbInvData, PrbInv}) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S_M0, S_M1}, PrbShrData) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition({O_M0, O_M1}, PrbShrData) {L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({S0, S1, S0_C, S1_C}, PrbShrData) {} { + pb_sendProbeResponseBackprobe; + pp_popProbeQueue; + } + + transition({Ms_F0, M0_F, M1_Ms, O_F0}, PrbInvData, IF_E0S) { L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pd_sendProbeResponseData; + ib_invBothClusters; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F1, M1_F, M0_Ms, O_F1}, PrbInvData, IF_E1S) {L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pd_sendProbeResponseData; + ib_invBothClusters; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F, O_F}, PrbInvData, IF_ES) {L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pd_sendProbeResponseData; + ib_invBothClusters; + i2_invL2; + a0_allocateL1D; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F0, M0_F, M1_Ms, O_F0}, PrbInv, IF_E0S) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F1, M1_F, M0_Ms, O_F1}, PrbInv, IF_E1S) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F, O_F}, PrbInv, IF_ES) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + i2_invL2; + a0_allocateL1D; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F0, M0_F, M1_Ms}, PrbShrData, O_F0) {L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({Ms_F1, M1_F, M0_Ms}, PrbShrData, O_F1) {} { + } + + transition({Ms_F}, PrbShrData, O_F) {L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({O_F0, O_F1, O_F}, PrbShrData) {L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + // END TRANSITIONS +} + + diff --git a/src/mem/protocol/MOESI_AMD_Base-L3cache.sm b/src/mem/protocol/MOESI_AMD_Base-L3cache.sm new file mode 100644 index 000000000..479cf4e78 --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-L3cache.sm @@ -0,0 +1,1130 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +machine(MachineType:L3Cache, "L3") + : CacheMemory * L3cache; + WireBuffer * reqToDir; + WireBuffer * respToDir; + WireBuffer * l3UnblockToDir; + WireBuffer * reqToL3; + WireBuffer * probeToL3; + WireBuffer * respToL3; + Cycles l3_request_latency := 1; + Cycles l3_response_latency := 35; + + // To the general response network + MessageBuffer * responseFromL3, network="To", virtual_network="2", ordered="false", vnet_type="response"; + + // From the general response network + MessageBuffer * responseToL3, network="From", virtual_network="2", ordered="false", vnet_type="response"; + +{ + // EVENTS + enumeration(Event, desc="L3 Events") { + // Requests coming from the Cores + RdBlk, desc="CPU RdBlk event"; + RdBlkM, desc="CPU RdBlkM event"; + RdBlkS, desc="CPU RdBlkS event"; + CtoD, desc="Change to Dirty request"; + WrVicBlk, desc="L2 Victim (dirty)"; + WrVicBlkShared, desc="L2 Victim (dirty)"; + ClVicBlk, desc="L2 Victim (clean)"; + ClVicBlkShared, desc="L2 Victim (clean)"; + + CPUData, desc="WB data from CPU"; + CPUDataShared, desc="WB data from CPU, NBReqShared 1"; + StaleWB, desc="WB stale; no data"; + + L3_Repl, desc="L3 Replacement"; + + // Probes + PrbInvData, desc="Invalidating probe, return dirty data"; + PrbInv, desc="Invalidating probe, no need to return data"; + PrbShrData, desc="Downgrading probe, return data"; + + // Coming from Memory Controller + WBAck, desc="ack from memory"; + + CancelWB, desc="Cancel WB from L2"; + } + + // STATES + // Base States: + state_declaration(State, desc="L3 State", default="L3Cache_State_I") { + M, AccessPermission:Read_Write, desc="Modified"; // No other cache has copy, memory stale + O, AccessPermission:Read_Only, desc="Owned"; // Correct most recent copy, others may exist in S + E, AccessPermission:Read_Write, desc="Exclusive"; // Correct, most recent, and only copy (and == Memory) + S, AccessPermission:Read_Only, desc="Shared"; // Correct, most recent. If no one in O, then == Memory + I, AccessPermission:Invalid, desc="Invalid"; + + I_M, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data"; + I_O, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data"; + I_E, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data"; + I_S, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data"; + S_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to M"; + S_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O"; + S_E, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to E"; + S_S, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to S"; + E_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O"; + E_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O"; + E_E, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O"; + E_S, AccessPermission:Busy, desc="Shared, received WrVicBlk, sent Ack, waiting for Data"; + O_M, AccessPermission:Busy, desc="..."; + O_O, AccessPermission:Busy, desc="..."; + O_E, AccessPermission:Busy, desc="..."; + O_S, AccessPermission:Busy, desc="..."; + M_M, AccessPermission:Busy, desc="..."; + M_O, AccessPermission:Busy, desc="..."; + M_E, AccessPermission:Busy, desc="..."; + M_S, AccessPermission:Busy, desc="..."; + D_I, AccessPermission:Invalid, desc="drop WB data on the floor when receive"; + MOD_I, AccessPermission:Busy, desc="drop WB data on the floor, waiting for WBAck from Mem"; + MO_I, AccessPermission:Busy, desc="M or O, received L3_Repl, waiting for WBAck from Mem"; + I_I, AccessPermission:Busy, desc="I_MO received L3_Repl"; + I_CD, AccessPermission:Busy, desc="I_I received WBAck, now just waiting for CPUData"; + I_C, AccessPermission:Invalid, desc="sent cancel, just waiting to receive mem wb ack so nothing gets confused"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + // STRUCTURES + + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff from memory?)"; + DataBlock DataBlk, desc="Data for the block"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block"; + bool Dirty, desc="Is the data dirty?"; + bool Shared, desc="Victim hit by shared probe"; + MachineID From, desc="Waiting for writeback from..."; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<L3Cache_TBE>", constructor="m_number_of_TBEs"; + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + + + // FUNCTION DEFINITIONS + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + Entry getCacheEntry(Addr addr), return_by_pointer="yes" { + return static_cast(Entry, "pointer", L3cache.lookup(addr)); + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + return getCacheEntry(addr).DataBlk; + } + + bool presentOrAvail(Addr addr) { + return L3cache.isTagPresent(addr) || L3cache.cacheAvail(addr); + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return L3Cache_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return L3Cache_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(L3Cache_State_to_permission(state)); + } + } + + void recordRequestType(RequestType request_type, Addr addr) { + + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + return true; + } + + + // OUT PORTS + out_port(requestNetwork_out, CPURequestMsg, reqToDir); + out_port(L3Resp_out, ResponseMsg, respToDir); + out_port(responseNetwork_out, ResponseMsg, responseFromL3); + out_port(unblockNetwork_out, UnblockMsg, l3UnblockToDir); + + // IN PORTS + in_port(NBResponse_in, ResponseMsg, respToL3) { + if (NBResponse_in.isReady(clockEdge())) { + peek(NBResponse_in, ResponseMsg) { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:NBSysWBAck) { + trigger(Event:WBAck, in_msg.addr, cache_entry, tbe); + } else { + DPRINTF(RubySlicc, "%s\n", in_msg); + error("Error on NBResponse Type"); + } + } + } + } + + // Response Network + in_port(responseNetwork_in, ResponseMsg, responseToL3) { + if (responseNetwork_in.isReady(clockEdge())) { + peek(responseNetwork_in, ResponseMsg) { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:CPUData) { + if (in_msg.NbReqShared) { + trigger(Event:CPUDataShared, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:CPUData, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:StaleNotif) { + trigger(Event:StaleWB, in_msg.addr, cache_entry, tbe); + } else { + DPRINTF(RubySlicc, "%s\n", in_msg); + error("Error on NBResponse Type"); + } + } + } + } + + // probe network + in_port(probeNetwork_in, NBProbeRequestMsg, probeToL3) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, NBProbeRequestMsg) { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + if (in_msg.Type == ProbeRequestType:PrbInv) { + if (in_msg.ReturnData) { + trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { + if (in_msg.ReturnData) { + trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); + } else { + error("Don't think I should get any of these"); + } + } + } + } + } + + // Request Network + in_port(requestNetwork_in, CPURequestMsg, reqToL3) { + if (requestNetwork_in.isReady(clockEdge())) { + peek(requestNetwork_in, CPURequestMsg) { + assert(in_msg.Destination.isElement(machineID)); + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + if (in_msg.Type == CoherenceRequestType:RdBlk) { + trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { + trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { + trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:VicClean) { + if (presentOrAvail(in_msg.addr)) { + if (in_msg.Shared) { + trigger(Event:ClVicBlkShared, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:ClVicBlk, in_msg.addr, cache_entry, tbe); + } + } else { + Addr victim := L3cache.cacheProbe(in_msg.addr); + trigger(Event:L3_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else if (in_msg.Type == CoherenceRequestType:VicDirty) { + if (presentOrAvail(in_msg.addr)) { + if (in_msg.Shared) { + trigger(Event:WrVicBlkShared, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); + } + } else { + Addr victim := L3cache.cacheProbe(in_msg.addr); + trigger(Event:L3_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } else if (in_msg.Type == CoherenceRequestType:WrCancel) { + if (is_valid(tbe) && tbe.From == in_msg.Requestor) { + trigger(Event:CancelWB, in_msg.addr, cache_entry, tbe); + } else { + requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + } + } + } + } + + // BEGIN ACTIONS + + action(i_invL3, "i", desc="invalidate L3 cache block") { + if (is_valid(cache_entry)) { + L3cache.deallocate(address); + } + unset_cache_entry(); + } + + action(rm_sendResponseM, "rm", desc="send Modified response") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l3_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := cache_entry.Dirty; + out_msg.State := CoherenceState:Modified; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(rs_sendResponseS, "rs", desc="send Shared response") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l3_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := cache_entry.Dirty; + out_msg.State := CoherenceState:Shared; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + + action(r_requestToMem, "r", desc="Miss in L3, pass on") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(requestNetwork_out, CPURequestMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Shared := false; // unneeded for this request + out_msg.MessageSize := in_msg.MessageSize; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + if (is_valid(cache_entry)) { + tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs + tbe.Dirty := cache_entry.Dirty; + } + tbe.From := machineID; + } + + action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(vd_vicDirty, "vd", desc="Victimize dirty L3 data") { + enqueue(requestNetwork_out, CPURequestMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:VicDirty; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + + action(w_sendResponseWBAck, "w", desc="send WB Ack") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l3_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysWBAck; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { + enqueue(L3Resp_out, ResponseMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(ph_sendProbeResponseHit, "ph", desc="send probe ack, no data") { + enqueue(L3Resp_out, ResponseMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Dirty := false; + out_msg.Hit := true; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pm_sendProbeResponseMiss, "pm", desc="send probe ack, no data") { + enqueue(L3Resp_out, ResponseMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { + enqueue(L3Resp_out, ResponseMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.DataBlk := cache_entry.DataBlk; + assert(cache_entry.Dirty); + out_msg.Dirty := true; + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") { + enqueue(L3Resp_out, ResponseMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := tbe.DataBlk; + assert(tbe.Dirty); + out_msg.Dirty := true; + out_msg.Hit := true; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.State := CoherenceState:NA; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(mc_cancelMemWriteback, "mc", desc="send writeback cancel to memory") { + enqueue(requestNetwork_out, CPURequestMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:WrCancel; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + + action(a_allocateBlock, "a", desc="allocate L3 block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L3cache.allocate(address, new Entry)); + } + } + + action(d_writeData, "d", desc="write data to L3") { + peek(responseNetwork_in, ResponseMsg) { + if (in_msg.Dirty) { + cache_entry.Dirty := in_msg.Dirty; + } + cache_entry.DataBlk := in_msg.DataBlk; + DPRINTF(RubySlicc, "Writing to L3: %s\n", in_msg); + } + } + + action(rd_copyDataFromRequest, "rd", desc="write data to L3") { + peek(requestNetwork_in, CPURequestMsg) { + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := true; + } + } + + action(f_setFrom, "f", desc="set who WB is expected to come from") { + peek(requestNetwork_in, CPURequestMsg) { + tbe.From := in_msg.Requestor; + } + } + + action(rf_resetFrom, "rf", desc="reset From") { + tbe.From := machineID; + } + + action(wb_data, "wb", desc="write back data") { + enqueue(L3Resp_out, ResponseMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUData; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + if (tbe.Shared) { + out_msg.NbReqShared := true; + } else { + out_msg.NbReqShared := false; + } + out_msg.State := CoherenceState:Shared; // faux info + out_msg.MessageSize := MessageSizeType:Writeback_Data; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(wt_writeDataToTBE, "wt", desc="write WB data to TBE") { + peek(responseNetwork_in, ResponseMsg) { + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + } + } + + action(uu_sendUnblock, "uu", desc="state changed, unblock") { + enqueue(unblockNetwork_out, UnblockMsg, l3_request_latency) { + out_msg.addr := address; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { + L3cache.setMRU(address); + } + + action(p_popRequestQueue, "p", desc="pop request queue") { + requestNetwork_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="pop response queue") { + responseNetwork_in.dequeue(clockEdge()); + } + + action(pn_popNBResponseQueue, "pn", desc="pop NB response queue") { + NBResponse_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="pop probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + + action(zz_recycleRequestQueue, "\z", desc="recycle request queue") { + requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + + // END ACTIONS + + // BEGIN TRANSITIONS + + // transitions from base + + transition({I, I_C}, {RdBlk, RdBlkS, RdBlkM, CtoD}) {TagArrayRead} { + r_requestToMem; + p_popRequestQueue; + } + + transition(O, RdBlk ) {TagArrayRead, DataArrayRead} { + rs_sendResponseS; + ut_updateTag; + p_popRequestQueue; + } + transition(M, RdBlk, O) {TagArrayRead, DataArrayRead, TagArrayWrite} { + rs_sendResponseS; + ut_updateTag; + p_popRequestQueue; + } + + transition(S, RdBlk) {TagArrayRead, DataArrayRead} { + rs_sendResponseS; + ut_updateTag; + p_popRequestQueue; + } + transition(E, RdBlk, S) {TagArrayRead, DataArrayRead, TagArrayWrite} { + rs_sendResponseS; + ut_updateTag; + p_popRequestQueue; + } + + transition({M, O}, RdBlkS, O) {TagArrayRead, DataArrayRead, TagArrayWrite} { + rs_sendResponseS; + ut_updateTag; + p_popRequestQueue; + } + + transition({E, S}, RdBlkS, S) {TagArrayRead, DataArrayRead, TagArrayWrite} { + rs_sendResponseS; + ut_updateTag; + p_popRequestQueue; + } + + transition(M, RdBlkM, I) {TagArrayRead, TagArrayWrite, DataArrayRead} { + rm_sendResponseM; + i_invL3; + p_popRequestQueue; + } + + transition({O, S}, {RdBlkM, CtoD}) {TagArrayRead} { + r_requestToMem; // can't handle this, just forward + p_popRequestQueue; + } + + transition(E, RdBlkM, I) {TagArrayRead, TagArrayWrite, DataArrayRead} { + rm_sendResponseM; + i_invL3; + p_popRequestQueue; + } + + transition({I}, WrVicBlk, I_M) {TagArrayRead, TagArrayWrite} { + a_allocateBlock; + t_allocateTBE; + f_setFrom; +// rd_copyDataFromRequest; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(I_C, {WrVicBlk, WrVicBlkShared, ClVicBlk, ClVicBlkShared}) {} { + zz_recycleRequestQueue; + } + + transition({I}, WrVicBlkShared, I_O) {TagArrayRead, TagArrayWrite} { + a_allocateBlock; + t_allocateTBE; + f_setFrom; +// rd_copyDataFromRequest; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(S, WrVicBlkShared, S_O) {TagArrayRead, TagArrayWrite} { +// rd_copyDataFromRequest; + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(S, WrVicBlk, S_M) {TagArrayRead, TagArrayWrite} { // should be technically not possible, but assume the data comes back with shared bit flipped +// rd_copyDataFromRequest; + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(E, WrVicBlk, E_M) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(E, WrVicBlkShared, E_O) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(O, WrVicBlk, O_M) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(O, WrVicBlkShared, O_O) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(M, WrVicBlk, M_M) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(M, WrVicBlkShared, M_O) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition({I}, ClVicBlk, I_E) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + a_allocateBlock; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition({I}, ClVicBlkShared, I_S) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + a_allocateBlock; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(S, ClVicBlk, S_E) {TagArrayRead, TagArrayWrite} { // technically impossible, assume data comes back with shared bit flipped + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(S, ClVicBlkShared, S_S) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(E, ClVicBlk, E_E) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(E, ClVicBlkShared, E_S) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(O, ClVicBlk, O_E) {TagArrayRead, TagArrayWrite} { // technically impossible, but assume data comes back with shared bit flipped + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(O, ClVicBlkShared, O_S) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(M, ClVicBlk, M_E) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(M, ClVicBlkShared, M_S) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition({MO_I}, {RdBlk, RdBlkS, RdBlkM, CtoD}) {} { + r_requestToMem; + p_popRequestQueue; + } + + transition(MO_I, {WrVicBlkShared, WrVicBlk, ClVicBlk, ClVicBlkShared}, MOD_I) {TagArrayWrite} { + f_setFrom; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(I_M, CPUData, M) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_M, CPUDataShared, O) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_O, {CPUData, CPUDataShared}, O) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_E, CPUData, E) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_E, CPUDataShared, S) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(I_S, {CPUData, CPUDataShared}, S) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + pr_popResponseQueue; + } + + transition(S_M, CPUDataShared, O) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(S_O, {CPUData, CPUDataShared}, O) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(S_E, CPUDataShared, S) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(S_S, {CPUData, CPUDataShared}, S) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(O_E, CPUDataShared, O) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition(O_S, {CPUData, CPUDataShared}, O) {DataArrayWrite, TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + d_writeData; + ut_updateTag; // update tag on writeback hits. + pr_popResponseQueue; + } + + transition({D_I}, {CPUData, CPUDataShared}, I) {TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(MOD_I, {CPUData, CPUDataShared}, MO_I) {TagArrayWrite} { + uu_sendUnblock; + rf_resetFrom; + pr_popResponseQueue; + } + + transition(I_I, {CPUData, CPUDataShared}, MO_I) {TagArrayWrite, DataArrayRead} { + uu_sendUnblock; + wt_writeDataToTBE; + rf_resetFrom; + pr_popResponseQueue; + } + + transition(I_CD, {CPUData, CPUDataShared}, I) {DataArrayRead, TagArrayWrite} { + uu_sendUnblock; + wt_writeDataToTBE; + wb_data; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition({M, O}, L3_Repl, MO_I) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + vd_vicDirty; + i_invL3; + } + + transition({E, S,}, L3_Repl, I) {TagArrayRead, TagArrayWrite} { + i_invL3; + } + + transition({I_M, I_O, S_M, S_O, E_M, E_O}, L3_Repl) {} { + zz_recycleRequestQueue; + } + + transition({O_M, O_O, O_E, O_S, M_M, M_O, M_E, M_S}, L3_Repl) {} { + zz_recycleRequestQueue; + } + + transition({I_E, I_S, S_E, S_S, E_E, E_S}, L3_Repl) {} { + zz_recycleRequestQueue; + } + + transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite, DataArrayRead} { + pd_sendProbeResponseData; + i_invL3; + pp_popProbeQueue; + } + + transition({E, S, I}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + i_invL3; // nothing will happen in I + pp_popProbeQueue; + } + + transition({M, O, E, S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} { + pi_sendProbeResponseInv; + i_invL3; // nothing will happen in I + pp_popProbeQueue; + } + + transition({M, O}, PrbShrData, O) {TagArrayRead, DataArrayRead, TagArrayWrite} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({E, S}, PrbShrData, S) {TagArrayRead, TagArrayWrite} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition(I, PrbShrData) {TagArrayRead} { + pm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + transition(MO_I, PrbInvData, I_C) {TagArrayWrite, DataArrayRead} { + pdt_sendProbeResponseDataFromTBE; + mc_cancelMemWriteback; + pp_popProbeQueue; + } + + transition(MO_I, PrbInv, I_C) {TagArrayWrite} { + pi_sendProbeResponseInv; + mc_cancelMemWriteback; + pp_popProbeQueue; + } + + transition(MO_I, PrbShrData) {DataArrayRead} { + pdt_sendProbeResponseDataFromTBE; + pp_popProbeQueue; + } + + transition(I_C, {PrbInvData, PrbInv}) {} { + pi_sendProbeResponseInv; + pp_popProbeQueue; + } + + transition(I_C, PrbShrData) {} { + pm_sendProbeResponseMiss; + pp_popProbeQueue; + } + + transition(I_I, {WBAck}, I_CD) {TagArrayWrite} { + pn_popNBResponseQueue; + } + + transition(MOD_I, WBAck, D_I) {DataArrayRead} { + wb_data; + pn_popNBResponseQueue; + } + + transition(MO_I, WBAck, I) {DataArrayRead, TagArrayWrite} { + wb_data; + dt_deallocateTBE; + pn_popNBResponseQueue; + } + + transition(I_C, {WBAck}, I) {TagArrayWrite} { + dt_deallocateTBE; + pn_popNBResponseQueue; + } + + transition({I_M, I_O, I_E, I_S}, CancelWB, I) {TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + i_invL3; + p_popRequestQueue; + } + + transition({S_S, S_O, S_M, S_E}, CancelWB, S) {TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + p_popRequestQueue; + } + + transition({E_M, E_O, E_E, E_S}, CancelWB, E) {TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + p_popRequestQueue; + } + + transition({O_M, O_O, O_E, O_S}, CancelWB, O) {TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + p_popRequestQueue; + } + + transition({M_M, M_O, M_E, M_S}, CancelWB, M) {TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + p_popRequestQueue; + } + + transition(D_I, CancelWB, I) {TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + p_popRequestQueue; + } + + transition(MOD_I, CancelWB, MO_I) {TagArrayWrite} { + uu_sendUnblock; + rf_resetFrom; + p_popRequestQueue; + } + + transition(I_I, CancelWB, I_C) {TagArrayWrite} { + uu_sendUnblock; + rf_resetFrom; + mc_cancelMemWriteback; + p_popRequestQueue; + } + + transition(I_CD, CancelWB, I) {TagArrayWrite} { + uu_sendUnblock; + dt_deallocateTBE; + mc_cancelMemWriteback; + p_popRequestQueue; + } + +} diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm b/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm new file mode 100644 index 000000000..fd84447a2 --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm @@ -0,0 +1,3009 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +machine(MachineType:CorePair, "CP-like Core Coherence") + : Sequencer * sequencer; + Sequencer * sequencer1; + CacheMemory * L1Icache; + CacheMemory * L1D0cache; + CacheMemory * L1D1cache; + CacheMemory * L2cache; + int regionBufferNum; + bool send_evictions := "False"; + Cycles issue_latency := 5; + Cycles l2_hit_latency := 18; + + // BEGIN Core Buffers + + // To the Network + MessageBuffer * requestFromCore, network="To", virtual_network="0", ordered="true", vnet_type="request"; + MessageBuffer * responseFromCore, network="To", virtual_network="2", ordered="false", vnet_type="response"; + MessageBuffer * unblockFromCore, network="To", virtual_network="4", ordered="false", vnet_type="unblock"; + + // From the Network + MessageBuffer * probeToCore, network="From", virtual_network="0", ordered="false", vnet_type="request"; + MessageBuffer * responseToCore, network="From", virtual_network="2", ordered="false", vnet_type="response"; + + MessageBuffer * mandatoryQueue, ordered="false"; + MessageBuffer * triggerQueue, ordered="true"; + + // END Core Buffers + +{ + // BEGIN STATES + state_declaration(State, desc="Cache states", default="CorePair_State_I") { + + I, AccessPermission:Invalid, desc="Invalid"; + S, AccessPermission:Read_Only, desc="Shared"; + E0, AccessPermission:Read_Write, desc="Exclusive with Cluster 0 ownership"; + E1, AccessPermission:Read_Write, desc="Exclusive with Cluster 1 ownership"; + Es, AccessPermission:Read_Write, desc="Exclusive in core"; + O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line"; + Ms, AccessPermission:Read_Write, desc="Modified in core, both clusters may be sharing line"; + M0, AccessPermission:Read_Write, desc="Modified with cluster ownership"; + M1, AccessPermission:Read_Write, desc="Modified with cluster ownership"; + + // Transient States + I_M0, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet"; + I_M1, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet"; + I_M0M1, AccessPermission:Busy, desc="Was in I_M0, got a store request from other cluster as well"; + I_M1M0, AccessPermission:Busy, desc="Was in I_M1, got a store request from other cluster as well"; + I_M0Ms, AccessPermission:Busy, desc="Was in I_M0, got a load request from other cluster as well"; + I_M1Ms, AccessPermission:Busy, desc="Was in I_M1, got a load request from other cluster as well"; + I_E0S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet"; + I_E1S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet"; + I_ES, AccessPermission:Busy, desc="S_F got hit by invalidating probe, RdBlk response needs to go to both clusters"; + + IF_E0S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E0S but expecting a L2_to_L1D0 trigger, just drop when receive"; + IF_E1S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E1S but expecting a L2_to_L1D1 trigger, just drop when receive"; + IF_ES, AccessPermission:Busy, desc="same, but waiting for two fills"; + IF0_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one"; + IF1_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one"; + F_S0, AccessPermission:Busy, desc="same, but going to S0 when trigger received"; + F_S1, AccessPermission:Busy, desc="same, but going to S1 when trigger received"; + + ES_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for clean writeback ack"; + MO_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for dirty writeback ack"; + MO_S0, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS"; + MO_S1, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS"; + S_F0, AccessPermission:Read_Only, desc="Shared, filling L1"; + S_F1, AccessPermission:Read_Only, desc="Shared, filling L1"; + S_F, AccessPermission:Read_Only, desc="Shared, filling L1"; + O_F0, AccessPermission:Read_Only, desc="Owned, filling L1"; + O_F1, AccessPermission:Read_Only, desc="Owned, filling L1"; + O_F, AccessPermission:Read_Only, desc="Owned, filling L1"; + Si_F0, AccessPermission:Read_Only, desc="Shared, filling icache"; + Si_F1, AccessPermission:Read_Only, desc="Shared, filling icache"; + S_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + S_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + O_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + O_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; + S0, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 0, waiting for response"; + S1, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 1, waiting for response"; + + Es_F0, AccessPermission:Read_Write, desc="Es, Cluster read, filling"; + Es_F1, AccessPermission:Read_Write, desc="Es, Cluster read, filling"; + Es_F, AccessPermission:Read_Write, desc="Es, other cluster read, filling"; + E0_F, AccessPermission:Read_Write, desc="E0, cluster read, filling"; + E1_F, AccessPermission:Read_Write, desc="..."; + E0_Es, AccessPermission:Read_Write, desc="..."; + E1_Es, AccessPermission:Read_Write, desc="..."; + Ms_F0, AccessPermission:Read_Write, desc="..."; + Ms_F1, AccessPermission:Read_Write, desc="..."; + Ms_F, AccessPermission:Read_Write, desc="..."; + M0_F, AccessPermission:Read_Write, desc="..."; + M0_Ms, AccessPermission:Read_Write, desc="..."; + M1_F, AccessPermission:Read_Write, desc="..."; + M1_Ms, AccessPermission:Read_Write, desc="..."; + + I_C, AccessPermission:Invalid, desc="Invalid, but waiting for WBAck from NB from canceled writeback"; + S0_C, AccessPermission:Busy, desc="MO_S0 hit by invalidating probe, waiting for WBAck form NB for canceled WB"; + S1_C, AccessPermission:Busy, desc="MO_S1 hit by invalidating probe, waiting for WBAck form NB for canceled WB"; + S_C, AccessPermission:Busy, desc="S*_C got NB_AckS, still waiting for WBAck"; + + } // END STATES + + // BEGIN EVENTS + enumeration(Event, desc="CP Events") { + // CP Initiated events + C0_Load_L1miss, desc="Cluster 0 load, L1 missed"; + C0_Load_L1hit, desc="Cluster 0 load, L1 hit"; + C1_Load_L1miss, desc="Cluster 1 load L1 missed"; + C1_Load_L1hit, desc="Cluster 1 load L1 hit"; + Ifetch0_L1hit, desc="Instruction fetch, hit in the L1"; + Ifetch1_L1hit, desc="Instruction fetch, hit in the L1"; + Ifetch0_L1miss, desc="Instruction fetch, missed in the L1"; + Ifetch1_L1miss, desc="Instruction fetch, missed in the L1"; + C0_Store_L1miss, desc="Cluster 0 store missed in L1"; + C0_Store_L1hit, desc="Cluster 0 store hit in L1"; + C1_Store_L1miss, desc="Cluster 1 store missed in L1"; + C1_Store_L1hit, desc="Cluster 1 store hit in L1"; + // NB Initiated events + NB_AckS, desc="NB Ack to Core Request"; + NB_AckM, desc="NB Ack to Core Request"; + NB_AckE, desc="NB Ack to Core Request"; + + NB_AckWB, desc="NB Ack for writeback"; + + // Memory System initiatied events + L1I_Repl, desc="Replace address from L1I"; // Presumed clean + L1D0_Repl, desc="Replace address from L1D0"; // Presumed clean + L1D1_Repl, desc="Replace address from L1D1"; // Presumed clean + L2_Repl, desc="Replace address from L2"; + + L2_to_L1D0, desc="L1 fill from L2"; + L2_to_L1D1, desc="L1 fill from L2"; + L2_to_L1I, desc="L1 fill from L2"; + + // Probe Events + PrbInvData, desc="probe, return O or M data"; + PrbInvDataDemand, desc="probe, return O or M data. Demand request"; + PrbInv, desc="probe, no need for data"; + PrbShrData, desc="probe downgrade, return O or M data"; + PrbShrDataDemand, desc="probe downgrade, return O or M data. Demand request"; + ForceRepl, desc="probe from r-buf. Act as though a repl"; + ForceDowngrade, desc="probe from r-buf. Act as though a repl"; + + } // END EVENTS + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + L1D0DataArrayRead, desc="Read the data array"; + L1D0DataArrayWrite, desc="Write the data array"; + L1D0TagArrayRead, desc="Read the data array"; + L1D0TagArrayWrite, desc="Write the data array"; + L1D1DataArrayRead, desc="Read the data array"; + L1D1DataArrayWrite, desc="Write the data array"; + L1D1TagArrayRead, desc="Read the data array"; + L1D1TagArrayWrite, desc="Write the data array"; + L1IDataArrayRead, desc="Read the data array"; + L1IDataArrayWrite, desc="Write the data array"; + L1ITagArrayRead, desc="Read the data array"; + L1ITagArrayWrite, desc="Write the data array"; + L2DataArrayRead, desc="Read the data array"; + L2DataArrayWrite, desc="Write the data array"; + L2TagArrayRead, desc="Read the data array"; + L2TagArrayWrite, desc="Write the data array"; + } + + + // BEGIN STRUCTURE DEFINITIONS + + + // Cache Entry + structure(Entry, desc="...", interface="AbstractCacheEntry") { + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (diff than memory)?"; + DataBlock DataBlk, desc="data for the block"; + bool FromL2, default="false", desc="block just moved from L2"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; + bool Shared, desc="Victim hit by shared probe"; + bool AckNeeded, desc="True if need to ack r-dir"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<CorePair_TBE>", constructor="m_number_of_TBEs"; + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + // END STRUCTURE DEFINITIONS + + // BEGIN INTERNAL FUNCTIONS + + MachineID getPeer(MachineID mach) { + return createMachineID(MachineType:RegionBuffer, intToID(regionBufferNum)); + } + + bool addressInCore(Addr addr) { + return (L2cache.isTagPresent(addr) || L1Icache.isTagPresent(addr) || L1D0cache.isTagPresent(addr) || L1D1cache.isTagPresent(addr)); + } + + Entry getCacheEntry(Addr address), return_by_pointer="yes" { + Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address)); + return L2cache_entry; + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return tbe.DataBlk; + } else { + return getCacheEntry(addr).DataBlk; + } + } + + Entry getL1CacheEntry(Addr addr, int cluster), return_by_pointer="yes" { + if (cluster == 0) { + Entry L1D0_entry := static_cast(Entry, "pointer", L1D0cache.lookup(addr)); + return L1D0_entry; + } else { + Entry L1D1_entry := static_cast(Entry, "pointer", L1D1cache.lookup(addr)); + return L1D1_entry; + } + } + + Entry getICacheEntry(Addr addr), return_by_pointer="yes" { + Entry c_entry := static_cast(Entry, "pointer", L1Icache.lookup(addr)); + return c_entry; + } + + bool presentOrAvail2(Addr addr) { + return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr); + } + + bool presentOrAvailI(Addr addr) { + return L1Icache.isTagPresent(addr) || L1Icache.cacheAvail(addr); + } + + bool presentOrAvailD0(Addr addr) { + return L1D0cache.isTagPresent(addr) || L1D0cache.cacheAvail(addr); + } + + bool presentOrAvailD1(Addr addr) { + return L1D1cache.isTagPresent(addr) || L1D1cache.cacheAvail(addr); + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if(is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + return CorePair_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return CorePair_State_to_permission(cache_entry.CacheState); + } + + return AccessPermission:NotPresent; + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + bool isValid(Addr addr) { + AccessPermission perm := getAccessPermission(addr); + if (perm == AccessPermission:NotPresent || + perm == AccessPermission:Invalid || + perm == AccessPermission:Busy) { + return false; + } else { + return true; + } + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(CorePair_State_to_permission(state)); + } + } + + MachineType testAndClearLocalHit(Entry cache_entry) { + assert(is_valid(cache_entry)); + if (cache_entry.FromL2) { + cache_entry.FromL2 := false; + return MachineType:L2Cache; + } else { + return MachineType:L1Cache; + } + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:L1D0DataArrayRead) { + L1D0cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L1D0DataArrayWrite) { + L1D0cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L1D0TagArrayRead) { + L1D0cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L1D0TagArrayWrite) { + L1D0cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } else if (request_type == RequestType:L1D1DataArrayRead) { + L1D1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L1D1DataArrayWrite) { + L1D1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L1D1TagArrayRead) { + L1D1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L1D1TagArrayWrite) { + L1D1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } else if (request_type == RequestType:L1IDataArrayRead) { + L1Icache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L1IDataArrayWrite) { + L1Icache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L1ITagArrayRead) { + L1Icache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L1ITagArrayWrite) { + L1Icache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } else if (request_type == RequestType:L2DataArrayRead) { + L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L2DataArrayWrite) { + L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L2TagArrayRead) { + L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L2TagArrayWrite) { + L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:L2DataArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L2DataArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L2TagArrayRead) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L2TagArrayWrite) { + return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1D0DataArrayRead) { + return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1D0DataArrayWrite) { + return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1D0TagArrayRead) { + return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1D0TagArrayWrite) { + return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1D1DataArrayRead) { + return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1D1DataArrayWrite) { + return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1D1TagArrayRead) { + return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1D1TagArrayWrite) { + return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1IDataArrayRead) { + return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1IDataArrayWrite) { + return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L1ITagArrayRead) { + return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L1ITagArrayWrite) { + return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + return true; + } + } + + // END INTERNAL FUNCTIONS + + // ** OUT_PORTS ** + + out_port(requestNetwork_out, CPURequestMsg, requestFromCore); + out_port(responseNetwork_out, ResponseMsg, responseFromCore); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + out_port(unblockNetwork_out, UnblockMsg, unblockFromCore); + + // ** IN_PORTS ** + + in_port(triggerQueue_in, TriggerMsg, triggerQueue, block_on="addr") { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == TriggerType:L2_to_L1) { + if (in_msg.Dest == CacheId:L1I) { + trigger(Event:L2_to_L1I, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Dest == CacheId:L1D0) { + trigger(Event:L2_to_L1D0, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Dest == CacheId:L1D1) { + trigger(Event:L2_to_L1D1, in_msg.addr, cache_entry, tbe); + } else { + error("unexpected trigger dest"); + } + } + } + } + } + + + in_port(probeNetwork_in, NBProbeRequestMsg, probeToCore) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, NBProbeRequestMsg, block_on="addr") { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == ProbeRequestType:PrbInv) { + if (in_msg.DemandRequest) { + trigger(Event:PrbInvDataDemand, in_msg.addr, cache_entry, tbe); + } else if (in_msg.ReturnData) { + trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { + if (in_msg.DemandRequest) { + trigger(Event:PrbShrDataDemand, in_msg.addr, cache_entry, tbe); + } else { + assert(in_msg.ReturnData); + trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == ProbeRequestType:PrbRepl) { + trigger(Event:ForceRepl, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == ProbeRequestType:PrbRegDowngrade) { + trigger(Event:ForceDowngrade, in_msg.addr, cache_entry, tbe); + } else { + error("Unknown probe request"); + } + } + } + } + + + // ResponseNetwork + in_port(responseToCore_in, ResponseMsg, responseToCore) { + if (responseToCore_in.isReady(clockEdge())) { + peek(responseToCore_in, ResponseMsg, block_on="addr") { + + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs.lookup(in_msg.addr); + + if (in_msg.Type == CoherenceResponseType:NBSysResp) { + if (in_msg.State == CoherenceState:Modified) { + trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe); + } else if (in_msg.State == CoherenceState:Shared) { + trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe); + } else if (in_msg.State == CoherenceState:Exclusive) { + trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) { + trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe); + } else { + error("Unexpected Response Message to Core"); + } + } + } + } + + // Nothing from the Unblock Network + + // Mandatory Queue + in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { + if (mandatoryQueue_in.isReady(clockEdge())) { + peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { + + Entry cache_entry := getCacheEntry(in_msg.LineAddress); + TBE tbe := TBEs.lookup(in_msg.LineAddress); + + if (in_msg.Type == RubyRequestType:IFETCH) { + // FETCH ACCESS + + if (L1Icache.isTagPresent(in_msg.LineAddress)) { + if (mod(in_msg.contextId, 2) == 0) { + trigger(Event:Ifetch0_L1hit, in_msg.LineAddress, cache_entry, tbe); + } else { + trigger(Event:Ifetch1_L1hit, in_msg.LineAddress, cache_entry, tbe); + } + } else { + if (presentOrAvail2(in_msg.LineAddress)) { + if (presentOrAvailI(in_msg.LineAddress)) { + if (mod(in_msg.contextId, 2) == 0) { + trigger(Event:Ifetch0_L1miss, in_msg.LineAddress, cache_entry, + tbe); + } else { + trigger(Event:Ifetch1_L1miss, in_msg.LineAddress, cache_entry, + tbe); + } + } else { + Addr victim := L1Icache.cacheProbe(in_msg.LineAddress); + trigger(Event:L1I_Repl, victim, + getCacheEntry(victim), TBEs.lookup(victim)); + } + } else { // Not present or avail in L2 + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + DPRINTF(RubySlicc, "Victim for %s L2_Repl(0) is %s\n", in_msg.LineAddress, victim); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } + } else { + // DATA ACCESS + if (mod(in_msg.contextId, 2) == 1) { + if (L1D1cache.isTagPresent(in_msg.LineAddress)) { + if (in_msg.Type == RubyRequestType:LD) { + trigger(Event:C1_Load_L1hit, in_msg.LineAddress, cache_entry, + tbe); + } else { + // Stores must write through, make sure L2 avail. + if (presentOrAvail2(in_msg.LineAddress)) { + trigger(Event:C1_Store_L1hit, in_msg.LineAddress, cache_entry, + tbe); + } else { + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + DPRINTF(RubySlicc, "Victim for %s L2_Repl(1) is %s\n", in_msg.LineAddress, victim); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } + } else { + if (presentOrAvail2(in_msg.LineAddress)) { + if (presentOrAvailD1(in_msg.LineAddress)) { + if (in_msg.Type == RubyRequestType:LD) { + trigger(Event:C1_Load_L1miss, in_msg.LineAddress, + cache_entry, tbe); + } else { + trigger(Event:C1_Store_L1miss, in_msg.LineAddress, + cache_entry, tbe); + } + } else { + Addr victim := L1D1cache.cacheProbe(in_msg.LineAddress); + DPRINTF(RubySlicc, "Victim for %s L1D1_Repl is %s\n", in_msg.LineAddress, victim); + trigger(Event:L1D1_Repl, victim, + getCacheEntry(victim), TBEs.lookup(victim)); + } + } else { // not present or avail in L2 + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + DPRINTF(RubySlicc, "Victim for %s L2_Repl(2) is %s\n", in_msg.LineAddress, victim); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } + } + } else { + Entry L1D0cache_entry := getL1CacheEntry(in_msg.LineAddress, 0); + if (is_valid(L1D0cache_entry)) { + if (in_msg.Type == RubyRequestType:LD) { + trigger(Event:C0_Load_L1hit, in_msg.LineAddress, cache_entry, + tbe); + } else { + if (presentOrAvail2(in_msg.LineAddress)) { + trigger(Event:C0_Store_L1hit, in_msg.LineAddress, cache_entry, + tbe); + } else { + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + DPRINTF(RubySlicc, "Victim for %s L2_Repl(3) is %s\n", in_msg.LineAddress, victim); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } + } else { + if (presentOrAvail2(in_msg.LineAddress)) { + if (presentOrAvailD0(in_msg.LineAddress)) { + if (in_msg.Type == RubyRequestType:LD) { + trigger(Event:C0_Load_L1miss, in_msg.LineAddress, + cache_entry, tbe); + } else { + trigger(Event:C0_Store_L1miss, in_msg.LineAddress, + cache_entry, tbe); + } + } else { + Addr victim := L1D0cache.cacheProbe(in_msg.LineAddress); + DPRINTF(RubySlicc, "Victim for %s L1D0_Repl is %s\n", in_msg.LineAddress, victim); + trigger(Event:L1D0_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } else { + Addr victim := L2cache.cacheProbe(in_msg.LineAddress); + DPRINTF(RubySlicc, "Victim for %s L2_Repl(4) is %s\n", in_msg.LineAddress, victim); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), + TBEs.lookup(victim)); + } + } + } + } + } + } + } + + + // ACTIONS + action(ii_invIcache, "ii", desc="invalidate iCache") { + if (L1Icache.isTagPresent(address)) { + L1Icache.deallocate(address); + } + } + + action(i0_invCluster, "i0", desc="invalidate cluster 0") { + if (L1D0cache.isTagPresent(address)) { + L1D0cache.deallocate(address); + } + } + + action(i1_invCluster, "i1", desc="invalidate cluster 1") { + if (L1D1cache.isTagPresent(address)) { + L1D1cache.deallocate(address); + } + } + + action(ib_invBothClusters, "ib", desc="invalidate both clusters") { + if (L1D0cache.isTagPresent(address)) { + L1D0cache.deallocate(address); + } + if (L1D1cache.isTagPresent(address)) { + L1D1cache.deallocate(address); + } + } + + action(i2_invL2, "i2", desc="invalidate L2") { + if(is_valid(cache_entry)) { + L2cache.deallocate(address); + } + unset_cache_entry(); + } + + action(n_issueRdBlk, "n", desc="Issue RdBlk") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlk; + out_msg.Requestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkM; + out_msg.Requestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(nMs_issueRdBlkMSinked, "nMs", desc="Issue RdBlkM with CtoDSinked") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkM; + out_msg.Requestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.CtoDSinked := true; + } + } + + action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkS; + out_msg.Requestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := curCycle(); + } + } + + action(nSs_issueRdBlkSSinked, "nSs", desc="Issue RdBlkS with CtoDSinked") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:RdBlkS; + out_msg.Requestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.CtoDSinked := true; + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + + action(vd_victim, "vd", desc="Victimize M/O L2 Data") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + assert(is_valid(cache_entry)); + out_msg.DataBlk := cache_entry.DataBlk; + assert(cache_entry.Dirty); + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicDirty; + out_msg.InitialRequestTime := curCycle(); + if (cache_entry.CacheState == State:O) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + } + } + + action(vc_victim, "vc", desc="Victimize E/S L2 Data") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicClean; + out_msg.InitialRequestTime := curCycle(); + if (cache_entry.CacheState == State:S) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + } + } + + // Could send these two directly to dir if we made a new out network on channel 0 + action(vdf_victimForce, "vdf", desc="Victimize M/O L2 Data") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + assert(is_valid(cache_entry)); + out_msg.DataBlk := cache_entry.DataBlk; + assert(cache_entry.Dirty); + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicDirty; + out_msg.InitialRequestTime := curCycle(); + if (cache_entry.CacheState == State:O) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + out_msg.Private := true; + } + } + + action(vcf_victimForce, "vcf", desc="Victimize E/S L2 Data") { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Type := CoherenceRequestType:VicClean; + out_msg.InitialRequestTime := curCycle(); + if (cache_entry.CacheState == State:S) { + out_msg.Shared := true; + } else { + out_msg.Shared := false; + } + out_msg.Private := true; + } + } + + action(a0_allocateL1D, "a0", desc="Allocate L1D0 Block") { + if (L1D0cache.isTagPresent(address) == false) { + L1D0cache.allocateVoid(address, new Entry); + } + } + + action(a1_allocateL1D, "a1", desc="Allocate L1D1 Block") { + if (L1D1cache.isTagPresent(address) == false) { + L1D1cache.allocateVoid(address, new Entry); + } + } + + action(ai_allocateL1I, "ai", desc="Allocate L1I Block") { + if (L1Icache.isTagPresent(address) == false) { + L1Icache.allocateVoid(address, new Entry); + } + } + + action(a2_allocateL2, "a2", desc="Allocate L2 Block") { + if (is_invalid(cache_entry)) { + set_cache_entry(L2cache.allocate(address, new Entry)); + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + assert(is_valid(cache_entry)); + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs + tbe.Dirty := cache_entry.Dirty; + tbe.Shared := false; + } + + action(d_deallocateTBE, "d", desc="Deallocate TBE") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { + mandatoryQueue_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { + responseToCore_in.dequeue(clockEdge()); + } + + action(pt_popTriggerQueue, "pt", desc="Pop Trigger Queue") { + triggerQueue_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="pop probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + + action(il0_loadDone, "il0", desc="Cluster 0 i load done") { + Entry entry := getICacheEntry(address); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + assert(is_valid(entry)); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer.readCallback(address, + l2entry.DataBlk, + true, + testAndClearLocalHit(entry)); + } + + action(il1_loadDone, "il1", desc="Cluster 1 i load done") { + Entry entry := getICacheEntry(address); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + assert(is_valid(entry)); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer1.readCallback(address, + l2entry.DataBlk, + true, + testAndClearLocalHit(entry)); + } + + action(l0_loadDone, "l0", desc="Cluster 0 load done") { + Entry entry := getL1CacheEntry(address, 0); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + assert(is_valid(entry)); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer.readCallback(address, + l2entry.DataBlk, + true, + testAndClearLocalHit(entry)); + } + + action(l1_loadDone, "l1", desc="Cluster 1 load done") { + Entry entry := getL1CacheEntry(address, 1); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + assert(is_valid(entry)); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + sequencer1.readCallback(address, + l2entry.DataBlk, + true, + testAndClearLocalHit(entry)); + } + + action(xl0_loadDone, "xl0", desc="Cluster 0 load done") { + peek(responseToCore_in, ResponseMsg) { + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + DPRINTF(ProtocolTrace, "CP Load Done 0 -- address %s, data: %s\n", + address, l2entry.DataBlk); + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + assert(is_valid(l2entry)); + sequencer.readCallback(address, + l2entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + } + + action(xl1_loadDone, "xl1", desc="Cluster 1 load done") { + peek(responseToCore_in, ResponseMsg) { + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + assert(is_valid(l2entry)); + sequencer1.readCallback(address, + l2entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + } + + action(xi0_loadDone, "xi0", desc="Cluster 0 i-load done") { + peek(responseToCore_in, ResponseMsg) { + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + assert(is_valid(l2entry)); + sequencer.readCallback(address, + l2entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + } + + action(xi1_loadDone, "xi1", desc="Cluster 1 i-load done") { + peek(responseToCore_in, ResponseMsg) { + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + Entry l2entry := getCacheEntry(address); // Used for functional accesses + // L2 supplies data (functional accesses only look in L2, ok because L1 + // writes through to L2) + assert(is_valid(l2entry)); + sequencer1.readCallback(address, + l2entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + } + } + + action(s0_storeDone, "s0", desc="Cluster 0 store done") { + Entry entry := getL1CacheEntry(address, 0); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + sequencer.writeCallback(address, + cache_entry.DataBlk, + true, + testAndClearLocalHit(entry)); + cache_entry.Dirty := true; + entry.DataBlk := cache_entry.DataBlk; + entry.Dirty := true; + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + } + + action(s1_storeDone, "s1", desc="Cluster 1 store done") { + Entry entry := getL1CacheEntry(address, 1); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + sequencer1.writeCallback(address, + cache_entry.DataBlk, + true, + testAndClearLocalHit(entry)); + cache_entry.Dirty := true; + entry.Dirty := true; + entry.DataBlk := cache_entry.DataBlk; + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + } + + action(xs0_storeDone, "xs0", desc="Cluster 0 store done") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getL1CacheEntry(address, 0); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + sequencer.writeCallback(address, + cache_entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + cache_entry.Dirty := true; + entry.Dirty := true; + entry.DataBlk := cache_entry.DataBlk; + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + } + } + + action(xs1_storeDone, "xs1", desc="Cluster 1 store done") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getL1CacheEntry(address, 1); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) || + (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache)); + sequencer1.writeCallback(address, + cache_entry.DataBlk, + false, + machineIDToMachineType(in_msg.Sender), + in_msg.InitialRequestTime, + in_msg.ForwardRequestTime, + in_msg.ProbeRequestStartTime); + cache_entry.Dirty := true; + entry.Dirty := true; + entry.DataBlk := cache_entry.DataBlk; + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + } + } + + action(forward_eviction_to_cpu0, "fec0", desc="sends eviction information to processor0") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer.evictionCallback(address); + } + } + + action(forward_eviction_to_cpu1, "fec1", desc="sends eviction information to processor1") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer1.evictionCallback(address); + } + } + + action(ci_copyL2ToL1, "ci", desc="copy L2 data to L1") { + Entry entry := getICacheEntry(address); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.Dirty := cache_entry.Dirty; + entry.DataBlk := cache_entry.DataBlk; + entry.FromL2 := true; + } + + action(c0_copyL2ToL1, "c0", desc="copy L2 data to L1") { + Entry entry := getL1CacheEntry(address, 0); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.Dirty := cache_entry.Dirty; + entry.DataBlk := cache_entry.DataBlk; + entry.FromL2 := true; + } + + action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { + peek(responseToCore_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:StaleNotif; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(c1_copyL2ToL1, "c1", desc="copy L2 data to L1") { + Entry entry := getL1CacheEntry(address, 1); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.Dirty := cache_entry.Dirty; + entry.DataBlk := cache_entry.DataBlk; + entry.FromL2 := true; + } + + action(fi_L2ToL1, "fi", desc="L2 to L1 inst fill") { + enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L2_to_L1; + out_msg.Dest := CacheId:L1I; + } + } + + action(f0_L2ToL1, "f0", desc="L2 to L1 data fill") { + enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L2_to_L1; + out_msg.Dest := CacheId:L1D0; + } + } + + action(f1_L2ToL1, "f1", desc="L2 to L1 data fill") { + enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L2_to_L1; + out_msg.Dest := CacheId:L1D1; + } + } + + action(wi_writeIcache, "wi", desc="write data to icache (and l2)") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getICacheEntry(address); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.DataBlk := in_msg.DataBlk; + entry.Dirty := in_msg.Dirty; + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(w0_writeDcache, "w0", desc="write data to dcache 0 (and l2)") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getL1CacheEntry(address, 0); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.DataBlk := in_msg.DataBlk; + entry.Dirty := in_msg.Dirty; + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(w1_writeDcache, "w1", desc="write data to dcache 1 (and l2)") { + peek(responseToCore_in, ResponseMsg) { + Entry entry := getL1CacheEntry(address, 1); + assert(is_valid(entry)); + assert(is_valid(cache_entry)); + entry.DataBlk := in_msg.DataBlk; + entry.Dirty := in_msg.Dirty; + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(wb_data, "wb", desc="write back data") { + peek(responseToCore_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUData; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + if (tbe.Shared) { + out_msg.NbReqShared := true; + } else { + out_msg.NbReqShared := false; + } + out_msg.State := CoherenceState:Shared; // faux info + out_msg.MessageSize := MessageSizeType:Writeback_Data; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Dirty := false; + out_msg.Hit := false; + out_msg.Ntsl := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.isValid := isValid(address); + } + } + + action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Dirty := false; + out_msg.Ntsl := true; + out_msg.Hit := false; + APPEND_TRANSITION_COMMENT("Setting Ms"); + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.isValid := isValid(address); + } + } + + action(ph_sendProbeResponseHit, "ph", desc="send probe ack PrbShrData, no data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + assert(addressInCore(address) || is_valid(tbe)); + out_msg.Dirty := false; // only true if sending back data i think + out_msg.Hit := true; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.isValid := isValid(address); + } + } + + action(pb_sendProbeResponseBackprobe, "pb", desc="send probe ack PrbShrData, no data, check for L1 residence") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + if (addressInCore(address)) { + out_msg.Hit := true; + } else { + out_msg.Hit := false; + } + out_msg.Dirty := false; // not sending back data, so def. not dirty + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.isValid := isValid(address); + } + } + + action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.DataBlk := cache_entry.DataBlk; + assert(cache_entry.Dirty); + out_msg.Dirty := true; + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.isValid := isValid(address); + } + } + + action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.DataBlk := cache_entry.DataBlk; + assert(cache_entry.Dirty); + out_msg.Dirty := true; + out_msg.Hit := true; + APPEND_TRANSITION_COMMENT("Setting Ms"); + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.isValid := isValid(address); + } + } + + action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + assert(is_valid(tbe)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.DataBlk := tbe.DataBlk; + assert(tbe.Dirty); + out_msg.Dirty := true; + out_msg.Hit := true; + out_msg.State := CoherenceState:NA; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.isValid := isValid(address); + } + } + + action(ra_sendReplAck, "ra", desc="Send ack to r-buf that line is replaced if needed") { + if (is_invalid(tbe) || tbe.AckNeeded) { + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:InvAck; + out_msg.Requestor := machineID; + out_msg.Destination.add(getPeer(machineID)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + APPEND_TRANSITION_COMMENT(" Sending ack to r-buf "); + } else { + APPEND_TRANSITION_COMMENT(" NOT Sending ack to r-buf "); + } + } + + action(m_markAckNeeded, "m", desc="Mark TBE to send ack when deallocated") { + assert(is_valid(tbe)); + tbe.AckNeeded := true; + } + + action(mc_cancelWB, "mc", desc="send writeback cancel to L3") { + enqueue(responseNetwork_out, ResponseMsg, issue_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:CPUCancelWB; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(s_setSharedFlip, "s", desc="hit by shared probe, status may be different") { + assert(is_valid(tbe)); + tbe.Shared := true; + } + + action(uu_sendUnblock, "uu", desc="state changed, unblock") { + enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { + out_msg.addr := address; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + out_msg.wasValid := isValid(address); + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(sdv_sendDoneValid, "sdv", desc="Request finished, send done ack") { + enqueue(unblockNetwork_out, UnblockMsg, 1) { + out_msg.addr := address; + out_msg.Destination.add(getPeer(machineID)); + out_msg.DoneAck := true; + out_msg.MessageSize := MessageSizeType:Unblock_Control; + if (is_valid(tbe)) { + out_msg.Dirty := tbe.Dirty; + } else if (is_valid(cache_entry)) { + out_msg.Dirty := cache_entry.Dirty; + } else { + out_msg.Dirty := false; + } + out_msg.validToInvalid := false; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(sdi_sendDoneInvalid, "sdi", desc="Request finished, send done ack") { + enqueue(unblockNetwork_out, UnblockMsg, 1) { + out_msg.addr := address; + out_msg.Destination.add(getPeer(machineID)); + out_msg.DoneAck := true; + out_msg.MessageSize := MessageSizeType:Unblock_Control; + if (is_valid(tbe)) { + out_msg.Dirty := tbe.Dirty; + } else if (is_valid(cache_entry)) { + out_msg.Dirty := cache_entry.Dirty; + } else { + out_msg.Dirty := false; + } + out_msg.validToInvalid := true; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(l10m_profileMiss, "l10m", desc="l10m miss profile") { + ++L1D0cache.demand_misses; + } + + action(l11m_profileMiss, "l11m", desc="l11m miss profile") { + ++L1D1cache.demand_misses; + } + + action(l1im_profileMiss, "l1lm", desc="l1im miss profile") { + ++L1Icache.demand_misses; + } + + action(l2m_profileMiss, "l2m", desc="l2m miss profile") { + ++L2cache.demand_misses; + } + + action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") { + probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") { + mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + // END ACTIONS + + // BEGIN TRANSITIONS + + // transitions from base + transition(I, C0_Load_L1miss, I_E0S) {L1D0TagArrayRead, L2TagArrayRead} { + // track misses, if implemented + // since in I state, L2 miss as well + l2m_profileMiss; + l10m_profileMiss; + a0_allocateL1D; + l1im_profileMiss; + a2_allocateL2; + i1_invCluster; + ii_invIcache; + n_issueRdBlk; + p_popMandatoryQueue; + } + + transition(I, C1_Load_L1miss, I_E1S) {L1D1TagArrayRead, L2TagArrayRead} { + // track misses, if implemented + // since in I state, L2 miss as well + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + a2_allocateL2; + i0_invCluster; + ii_invIcache; + n_issueRdBlk; + p_popMandatoryQueue; + } + + transition(I, Ifetch0_L1miss, S0) {L1ITagArrayRead, L2TagArrayRead} { + // track misses, if implemented + // L2 miss as well + l10m_profileMiss; + l2m_profileMiss; + l1im_profileMiss; + ai_allocateL1I; + a2_allocateL2; + ib_invBothClusters; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(I, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} { + l11m_profileMiss; + // track misses, if implemented + // L2 miss as well + l2m_profileMiss; + l1im_profileMiss; + ai_allocateL1I; + a2_allocateL2; + ib_invBothClusters; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(I, C0_Store_L1miss, I_M0) {L1D0TagArrayRead,L2TagArrayRead} { + l2m_profileMiss; + l10m_profileMiss; + a0_allocateL1D; + a2_allocateL2; + i1_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(I, C1_Store_L1miss, I_M1) {L1D0TagArrayRead, L2TagArrayRead} { + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + a2_allocateL2; + i0_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(S, C0_Load_L1miss, S_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(S, C1_Load_L1miss, S_F1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(S, Ifetch0_L1miss, Si_F0) {L1ITagArrayRead,L2TagArrayRead, L2DataArrayRead} { + l1im_profileMiss; + ai_allocateL1I; + fi_L2ToL1; + p_popMandatoryQueue; + } + + transition(S, Ifetch1_L1miss, Si_F1) {L1ITagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l1im_profileMiss; + ai_allocateL1I; + fi_L2ToL1; + p_popMandatoryQueue; + } + + transition({S}, {C0_Store_L1hit, C0_Store_L1miss}, S_M0) {L1D0TagArrayRead, L2TagArrayRead}{ + l2m_profileMiss; + l10m_profileMiss; + a0_allocateL1D; + i1_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition({S}, {C1_Store_L1hit, C1_Store_L1miss}, S_M1) {L1D1TagArrayRead,L2TagArrayRead} { + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + i0_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + transition(Es, C0_Load_L1miss, Es_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { // can this be folded with S_F? + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(Es, C1_Load_L1miss, Es_F1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} { // can this be folded with S_F? + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(Es, Ifetch0_L1miss, S0) {L1ITagArrayRead, L2TagArrayRead} { + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + ib_invBothClusters; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(Es, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} { + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + ib_invBothClusters; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + // THES SHOULD NOT BE INSTANTANEOUS BUT OH WELL FOR NOW + transition(Es, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayWrite,L1D0TagArrayRead, L2TagArrayRead, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} { + a0_allocateL1D; + i1_invCluster; + s0_storeDone; // instantaneous L1/L2 dirty - no writethrough delay + p_popMandatoryQueue; + } + + transition(Es, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} { + a1_allocateL1D; + i0_invCluster; + s1_storeDone; + p_popMandatoryQueue; + } + + transition(E0, C0_Load_L1miss, E0_F) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(E0, C1_Load_L1miss, E0_Es) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(E0, Ifetch0_L1miss, S0) {L2TagArrayRead, L1ITagArrayRead} { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + i0_invCluster; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(E0, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead } { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + i0_invCluster; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(E0, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} { + a0_allocateL1D; + s0_storeDone; + p_popMandatoryQueue; + } + + transition(E0, C1_Store_L1miss, M1) {L1D0TagArrayRead, L1D0TagArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} { + a1_allocateL1D; + l11m_profileMiss; + i0_invCluster; + s1_storeDone; + p_popMandatoryQueue; + } + + transition(E1, C1_Load_L1miss, E1_F) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + a1_allocateL1D; + l11m_profileMiss; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(E1, C0_Load_L1miss, E1_Es) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + a0_allocateL1D; + l10m_profileMiss; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(E1, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead} { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + i1_invCluster; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(E1, Ifetch0_L1miss, S0) {L2TagArrayRead,L1ITagArrayRead} { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l1im_profileMiss; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + i1_invCluster; + nS_issueRdBlkS; + p_popMandatoryQueue; + } + + transition(E1, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} { + a1_allocateL1D; + s1_storeDone; + p_popMandatoryQueue; + } + + transition(E1, C0_Store_L1miss, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} { + l10m_profileMiss; + a0_allocateL1D; + i1_invCluster; + s0_storeDone; + p_popMandatoryQueue; + } + + transition({O}, {C0_Store_L1hit, C0_Store_L1miss}, O_M0) {L1D0TagArrayRead, L2TagArrayRead} { + l2m_profileMiss; // permissions miss, still issue CtoD + l10m_profileMiss; + a0_allocateL1D; + i1_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition({O}, {C1_Store_L1hit, C1_Store_L1miss}, O_M1) {L1D1TagArrayRead, L2TagArrayRead} { + l2m_profileMiss; // permissions miss, still issue RdBlkS + l11m_profileMiss; + a1_allocateL1D; + i0_invCluster; + ii_invIcache; + nM_issueRdBlkM; + p_popMandatoryQueue; + } + + transition(O, C0_Load_L1miss, O_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(O, C1_Load_L1miss, O_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(Ms, C0_Load_L1miss, Ms_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(Ms, C1_Load_L1miss, Ms_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition({Ms, M0, M1, O}, Ifetch0_L1miss, MO_S0) {L1ITagArrayRead, L2TagArrayRead} { + l2m_profileMiss; // permissions miss + l1im_profileMiss; + ai_allocateL1I; + t_allocateTBE; + ib_invBothClusters; + vd_victim; +// i2_invL2; + p_popMandatoryQueue; + } + + transition({Ms, M0, M1, O}, Ifetch1_L1miss, MO_S1) {L1ITagArrayRead L2TagArrayRead } { + l2m_profileMiss; // permissions miss + l10m_profileMiss; + ai_allocateL1I; + t_allocateTBE; + ib_invBothClusters; + vd_victim; +// i2_invL2; + p_popMandatoryQueue; + } + + transition(Ms, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} { + a0_allocateL1D; + i1_invCluster; + s0_storeDone; + p_popMandatoryQueue; + } + + transition(Ms, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} { + a1_allocateL1D; + i0_invCluster; + s1_storeDone; + p_popMandatoryQueue; + } + + transition(M0, C0_Load_L1miss, M0_F) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(M0, C1_Load_L1miss, M0_Ms) {L2TagArrayRead, L2DataArrayRead,L1D1TagArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(M0, {C0_Store_L1hit, C0_Store_L1miss}) {L1D0TagArrayRead, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead} { + a0_allocateL1D; + s0_storeDone; + p_popMandatoryQueue; + } + + transition(M0, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead, L2TagArrayWrite} { + a1_allocateL1D; + i0_invCluster; + s1_storeDone; + p_popMandatoryQueue; + } + + transition(M1, C0_Load_L1miss, M1_Ms) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(M1, C1_Load_L1miss, M1_F) {L1D1TagArrayRead L2TagArrayRead, L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(M1, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} { + a0_allocateL1D; + i1_invCluster; + s0_storeDone; + p_popMandatoryQueue; + } + + transition(M1, {C1_Store_L1hit, C1_Store_L1miss}) {L1D1TagArrayRead, L1D1DataArrayWrite, L2TagArrayRead, L2DataArrayWrite} { + a1_allocateL1D; + s1_storeDone; + p_popMandatoryQueue; + } + + // end transitions from base + + // Begin simple hit transitions + transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es, + Ms_F1, M0_Ms}, C0_Load_L1hit) {L1D0TagArrayRead, L1D0DataArrayRead} { + // track hits, if implemented + l0_loadDone; + p_popMandatoryQueue; + } + + transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es, + Ms_F0, M1_Ms}, C1_Load_L1hit) {L1D1TagArrayRead, L1D1DataArrayRead} { + // track hits, if implemented + l1_loadDone; + p_popMandatoryQueue; + } + + transition({S, S_C, S_F0, S_F1, S_F}, Ifetch0_L1hit) {L1ITagArrayRead, L1IDataArrayRead} { + // track hits, if implemented + il0_loadDone; + p_popMandatoryQueue; + } + + transition({S, S_C, S_F0, S_F1, S_F}, Ifetch1_L1hit) {L1ITagArrayRead, L1IDataArrayWrite} { + // track hits, if implemented + il1_loadDone; + p_popMandatoryQueue; + } + + // end simple hit transitions + + // Transitions from transient states + + // recycles + transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES, + IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F, + E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, C0_Load_L1hit) {} { + zz_recycleMandatoryQueue; + } + + transition({IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M1, + O_M1, S0, S1, I_C, S0_C, S1_C, S_C}, C0_Load_L1miss) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES, + IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F, + E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, C1_Load_L1hit) {} { + zz_recycleMandatoryQueue; + } + + transition({IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M0, + O_M0, S0, S1, I_C, S0_C, S1_C, S_C}, C1_Load_L1miss) {} { + zz_recycleMandatoryQueue; + } + + transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, {Ifetch0_L1hit, Ifetch1_L1hit}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES, + IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, ES_I, MO_I, S_F0, S_F1, S_F, + O_F0, O_F1, O_F, S_M0, S_M1, O_M0, O_M1, Es_F0, Es_F1, Es_F, E0_F, + E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, I_C, + S_C}, {Ifetch0_L1miss, Ifetch1_L1miss}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_E1S, IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, S_F1, O_F1, + Si_F0, Si_F1, S_M1, O_M1, S0, S1, Es_F1, E1_F, E0_Es, Ms_F1, M0_Ms, + M1_F, I_C, S0_C, S1_C, S_C}, {C0_Store_L1miss}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_E0S, IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1 S_F0, O_F0, + Si_F0, Si_F1, S_M0, O_M0, S0, S1, Es_F0, E0_F, E1_Es, Ms_F0, M0_F, + M1_Ms, I_C, S0_C, S1_C, S_C}, {C1_Store_L1miss}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES, + IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M0, O_M0, Es_F0, Es_F1, Es_F, E0_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_Ms}, {C0_Store_L1hit}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES, + IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M1, + O_M1, Es_F0, Es_F1, Es_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, + M0_Ms, M1_F, M1_Ms}, {C1_Store_L1hit}) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES, + IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F, + E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, L1D0_Repl) {} { + zz_recycleMandatoryQueue; + } + + transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES, + IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F, + E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, L1D1_Repl) {} { + zz_recycleMandatoryQueue; + } + + transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, L1I_Repl) {} { + zz_recycleMandatoryQueue; + } + + transition({S_C, S0_C, S1_C, S0, S1, Si_F0, Si_F1, I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, S_M0, O_M0, S_M1, O_M1, Es_F0, Es_F1, Es_F, E0_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, MO_S0, MO_S1, IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, L2_Repl) {} { + zz_recycleMandatoryQueue; + } + + transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, {NB_AckS, + PrbInvData, PrbInvDataDemand, PrbInv, PrbShrData, PrbShrDataDemand}) {} { + zz_recycleMandatoryQueue; // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary. + } + + transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES}, NB_AckE) {} { + zz_recycleMandatoryQueue; // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary. + } + + transition({E0_Es, E1_F, Es_F1}, C0_Load_L1miss, Es_F) {L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(S_F1, C0_Load_L1miss, S_F) {L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(O_F1, C0_Load_L1miss, O_F) {L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition({Ms_F1, M0_Ms, M1_F}, C0_Load_L1miss, Ms_F) {L2DataArrayRead} { + l10m_profileMiss; + a0_allocateL1D; + f0_L2ToL1; + p_popMandatoryQueue; + } + + transition(I_M0, C1_Load_L1miss, I_M0Ms){ + l11m_profileMiss; + l2m_profileMiss; + a1_allocateL1D; + p_popMandatoryQueue; + } + + transition(I_M1, C0_Load_L1miss, I_M1Ms){ + l10m_profileMiss; + l2m_profileMiss; + a0_allocateL1D; + p_popMandatoryQueue; + } + + transition(I_M0, C1_Store_L1miss, I_M0M1) { + l11m_profileMiss; + l2m_profileMiss; + a1_allocateL1D; + p_popMandatoryQueue; + } + + transition(I_M1, C0_Store_L1miss, I_M1M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L2TagArrayRead, L2TagArrayWrite} { + l2m_profileMiss; + a0_allocateL1D; + p_popMandatoryQueue; + } + + transition(I_E0S, C1_Load_L1miss, I_ES) {} { + l2m_profileMiss; + l11m_profileMiss; + a1_allocateL1D; + p_popMandatoryQueue; + } + + transition(I_E1S, C0_Load_L1miss, I_ES) {} { + l2m_profileMiss; + l10m_profileMiss; + l2m_profileMiss; + a0_allocateL1D; + p_popMandatoryQueue; + } + + transition({E1_Es, E0_F, Es_F0}, C1_Load_L1miss, Es_F) {L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(S_F0, C1_Load_L1miss, S_F) { L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition(O_F0, C1_Load_L1miss, O_F) {L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition({Ms_F0, M1_Ms, M0_F}, C1_Load_L1miss, Ms_F) {L2DataArrayRead} { + l11m_profileMiss; + a1_allocateL1D; + f1_L2ToL1; + p_popMandatoryQueue; + } + + transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es, Ms_F1, M0_Ms}, L1D0_Repl) {L1D0TagArrayRead} { + i0_invCluster; + } + + transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es, Ms_F0, M1_Ms}, L1D1_Repl) {L1D1TagArrayRead} { + i1_invCluster; + } + + transition({S, S_C, S_F0, S_F1}, L1I_Repl) {L1ITagArrayRead} { + ii_invIcache; + } + + transition({S, E0, E1, Es}, L2_Repl, ES_I) {L2TagArrayRead,L1D0TagArrayRead, L1D1TagArrayRead, L1ITagArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + t_allocateTBE; + vc_victim; + ib_invBothClusters; + i2_invL2; + ii_invIcache; + } + + transition({Ms, M0, M1, O}, L2_Repl, MO_I) {L2TagArrayRead, L2TagArrayWrite, L1D0TagArrayRead, L1D1TagArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + t_allocateTBE; + vd_victim; + i2_invL2; + ib_invBothClusters; // nothing will happen for D0 on M1, vice versa + } + + transition(S0, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + wi_writeIcache; + xi0_loadDone; + uu_sendUnblock; + sdv_sendDoneValid; + pr_popResponseQueue; + } + + transition(S1, NB_AckS, S) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + wi_writeIcache; + xi1_loadDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(S0_C, NB_AckS, S_C) { L1IDataArrayWrite,L2DataArrayWrite} { + // does not need send done since the rdblks was "sinked" + wi_writeIcache; + xi0_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(S1_C, NB_AckS, S_C) { L1D1DataArrayWrite,L2DataArrayWrite} { + wi_writeIcache; + xi1_loadDone; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_M0, NB_AckM, M0) { L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + w0_writeDcache; + xs0_storeDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} { + w1_writeDcache; + xs1_storeDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + // THESE MO->M1 should not be instantaneous but oh well for now. + transition(I_M0M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} { + w0_writeDcache; + xs0_storeDone; + sdv_sendDoneValid; + uu_sendUnblock; + i0_invCluster; + s1_storeDone; + pr_popResponseQueue; + } + + transition(I_M1M0, NB_AckM, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} { + w1_writeDcache; + xs1_storeDone; + sdv_sendDoneValid; + uu_sendUnblock; + i1_invCluster; + s0_storeDone; + pr_popResponseQueue; + } + + // Above shoudl be more like this, which has some latency to xfer to L1 + transition(I_M0Ms, NB_AckM, M0_Ms) {L1D0DataArrayWrite,L2DataArrayWrite} { + w0_writeDcache; + xs0_storeDone; + sdv_sendDoneValid; + uu_sendUnblock; + f1_L2ToL1; + pr_popResponseQueue; + } + + transition(I_M1Ms, NB_AckM, M1_Ms) {L1D1DataArrayWrite,L2DataArrayWrite} { + w1_writeDcache; + xs1_storeDone; + sdv_sendDoneValid; + uu_sendUnblock; + f0_L2ToL1; + pr_popResponseQueue; + } + + transition(I_E0S, NB_AckE, E0) {L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + w0_writeDcache; + xl0_loadDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_E1S, NB_AckE, E1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + w1_writeDcache; + xl1_loadDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_ES, NB_AckE, Es) {L1D1DataArrayWrite, L1D1TagArrayWrite, L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite } { + w0_writeDcache; + xl0_loadDone; + w1_writeDcache; + xl1_loadDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_E0S, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + w0_writeDcache; + xl0_loadDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_E1S, NB_AckS, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} { + w1_writeDcache; + xl1_loadDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(I_ES, NB_AckS, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} { + w0_writeDcache; + xl0_loadDone; + w1_writeDcache; + xl1_loadDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(S_F0, L2_to_L1D0, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(S_F1, L2_to_L1D1, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(Si_F0, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + ci_copyL2ToL1; + il0_loadDone; + pt_popTriggerQueue; + } + + transition(Si_F1, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + ci_copyL2ToL1; + il1_loadDone; + pt_popTriggerQueue; + } + + transition(S_F, L2_to_L1D0, S_F1) { L1D0DataArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(S_F, L2_to_L1D1, S_F0) { L1D1DataArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(O_F0, L2_to_L1D0, O) { L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(O_F1, L2_to_L1D1, O) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(O_F, L2_to_L1D0, O_F1) { L1D0DataArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(O_F, L2_to_L1D1, O_F0) { L1D1DataArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(M1_F, L2_to_L1D1, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(M0_F, L2_to_L1D0, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Ms_F0, L2_to_L1D0, Ms) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Ms_F1, L2_to_L1D1, Ms) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(Ms_F, L2_to_L1D0, Ms_F1) {L1D0DataArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Ms_F, L2_to_L1D1, Ms_F0) {L1IDataArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(M1_Ms, L2_to_L1D0, Ms) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(M0_Ms, L2_to_L1D1, Ms) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(Es_F0, L2_to_L1D0, Es) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Es_F1, L2_to_L1D1, Es) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(Es_F, L2_to_L1D0, Es_F1) {L2TagArrayRead, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(Es_F, L2_to_L1D1, Es_F0) {L2TagArrayRead, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(E0_F, L2_to_L1D0, E0) {L2TagArrayRead, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(E1_F, L2_to_L1D1, E1) {L2TagArrayRead, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(E1_Es, L2_to_L1D0, Es) {L2TagArrayRead, L2DataArrayRead} { + c0_copyL2ToL1; + l0_loadDone; + pt_popTriggerQueue; + } + + transition(E0_Es, L2_to_L1D1, Es) {L2TagArrayRead, L2DataArrayRead} { + c1_copyL2ToL1; + l1_loadDone; + pt_popTriggerQueue; + } + + transition(IF_E0S, L2_to_L1D0, I_E0S) {} { + pt_popTriggerQueue; + } + + transition(IF_E1S, L2_to_L1D1, I_E1S) {} { + pt_popTriggerQueue; + } + + transition(IF_ES, L2_to_L1D0, IF1_ES) {} { + pt_popTriggerQueue; + } + + transition(IF_ES, L2_to_L1D1, IF0_ES) {} { + pt_popTriggerQueue; + } + + transition(IF0_ES, L2_to_L1D0, I_ES) {} { + pt_popTriggerQueue; + } + + transition(IF1_ES, L2_to_L1D1, I_ES) {} { + pt_popTriggerQueue; + } + + transition(F_S0, L2_to_L1I, S0) {} { + pt_popTriggerQueue; + } + + transition(F_S1, L2_to_L1I, S1) {} { + pt_popTriggerQueue; + } + + transition({S_M0, O_M0}, NB_AckM, M0) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + xs0_storeDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition({S_M1, O_M1}, NB_AckM, M1) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} { + xs1_storeDone; + sdv_sendDoneValid; + uu_sendUnblock; + pr_popResponseQueue; + } + + transition(MO_I, NB_AckWB, I) {L2TagArrayWrite} { + wb_data; + ra_sendReplAck; + sdi_sendDoneInvalid; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(ES_I, NB_AckWB, I) {L2TagArrayWrite} { + wb_data; + ra_sendReplAck; + sdi_sendDoneInvalid; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(MO_S0, NB_AckWB, S0) {L2TagArrayWrite} { + wb_data; + i2_invL2; + a2_allocateL2; + sdv_sendDoneValid; + nS_issueRdBlkS; + d_deallocateTBE; // FOO + pr_popResponseQueue; + } + + transition(MO_S1, NB_AckWB, S1) {L2TagArrayWrite} { + wb_data; + i2_invL2; + a2_allocateL2; + sdv_sendDoneValid; + nS_issueRdBlkS; + d_deallocateTBE; // FOO + pr_popResponseQueue; + } + + // Writeback cancel "ack" + transition(I_C, NB_AckWB, I) {L2TagArrayWrite} { + ss_sendStaleNotification; + sdi_sendDoneInvalid; + d_deallocateTBE; + pr_popResponseQueue; + } + + transition(S0_C, NB_AckWB, S0) {L2TagArrayWrite} { + ss_sendStaleNotification; + sdv_sendDoneValid; + pr_popResponseQueue; + } + + transition(S1_C, NB_AckWB, S1) {L2TagArrayWrite} { + ss_sendStaleNotification; + sdv_sendDoneValid; + pr_popResponseQueue; + } + + transition(S_C, NB_AckWB, S) {L2TagArrayWrite} { + ss_sendStaleNotification; + sdv_sendDoneValid; + pr_popResponseQueue; + } + + // Begin Probe Transitions + + transition({Ms, M0, M1, O}, {PrbInvData, PrbInvDataDemand}, I) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pd_sendProbeResponseData; + i2_invL2; + ib_invBothClusters; + pp_popProbeQueue; + } + + transition({Es, E0, E1, S, I}, {PrbInvData, PrbInvDataDemand}, I) {L2TagArrayRead, L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + ib_invBothClusters; + ii_invIcache; // only relevant for S + pp_popProbeQueue; + } + + transition(S_C, {PrbInvData, PrbInvDataDemand}, I_C) {L2TagArrayWrite} { + t_allocateTBE; + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(I_C, {PrbInvData, PrbInvDataDemand}, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + pp_popProbeQueue; + } + + transition({Ms, M0, M1, O, Es, E0, E1, S, I}, PrbInv, I) {L2TagArrayRead, L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; // nothing will happen in I + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(S_C, PrbInv, I_C) {L2TagArrayWrite} { + t_allocateTBE; + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(I_C, PrbInv, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition({Ms, M0, M1, O}, {PrbShrData, PrbShrDataDemand}, O) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({Es, E0, E1, S}, {PrbShrData, PrbShrDataDemand}, S) {L2TagArrayRead, L2TagArrayWrite} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition(S_C, {PrbShrData, PrbShrDataDemand}) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition({I, I_C}, {PrbShrData, PrbShrDataDemand}) {L2TagArrayRead} { + pb_sendProbeResponseBackprobe; + pp_popProbeQueue; + } + + transition({I_M0, I_E0S}, {PrbInv, PrbInvData, PrbInvDataDemand}) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; // must invalidate current data (only relevant for I_M0) + a0_allocateL1D; // but make sure there is room for incoming data when it arrives + pp_popProbeQueue; + } + + transition({I_M1, I_E1S}, {PrbInv, PrbInvData, PrbInvDataDemand}) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; // must invalidate current data (only relevant for I_M1) + a1_allocateL1D; // but make sure there is room for incoming data when it arrives + pp_popProbeQueue; + } + + transition({I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_ES}, {PrbInv, PrbInvData, PrbInvDataDemand, PrbShrData, PrbShrDataDemand}) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + a0_allocateL1D; + a1_allocateL1D; + pp_popProbeQueue; + } + + transition({I_M0, I_E0S, I_M1, I_E1S}, {PrbShrData, PrbShrDataDemand}) {} { + pb_sendProbeResponseBackprobe; + pp_popProbeQueue; + } + + transition(ES_I, {PrbInvData, PrbInvDataDemand}, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(MO_I, {PrbInvData, PrbInvDataDemand}, I_C) {} { + pdt_sendProbeResponseDataFromTBE; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(MO_I, PrbInv, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(ES_I, PrbInv, I_C) {} { + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + pp_popProbeQueue; + } + + transition(ES_I, {PrbShrData, PrbShrDataDemand}, ES_I) {} { + ph_sendProbeResponseHit; + s_setSharedFlip; + pp_popProbeQueue; + } + + transition(MO_I, {PrbShrData, PrbShrDataDemand}, MO_I) {} { + pdt_sendProbeResponseDataFromTBE; + s_setSharedFlip; + pp_popProbeQueue; + } + + transition(MO_S0, {PrbInvData, PrbInvDataDemand}, S0_C) {L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pdt_sendProbeResponseDataFromTBE; + i2_invL2; + a2_allocateL2; + nS_issueRdBlkS; + d_deallocateTBE; + pp_popProbeQueue; + } + + transition(MO_S1, {PrbInvData, PrbInvDataDemand}, S1_C) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pdt_sendProbeResponseDataFromTBE; + i2_invL2; + a2_allocateL2; + nS_issueRdBlkS; + d_deallocateTBE; + pp_popProbeQueue; + } + + transition(MO_S0, PrbInv, S0_C) {L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + a2_allocateL2; + nS_issueRdBlkS; + d_deallocateTBE; + pp_popProbeQueue; + } + + transition(MO_S1, PrbInv, S1_C) {L2TagArrayWrite} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + i2_invL2; + a2_allocateL2; + nS_issueRdBlkS; + d_deallocateTBE; + pp_popProbeQueue; + } + + transition({MO_S0, MO_S1}, {PrbShrData, PrbShrDataDemand}) {} { + pdt_sendProbeResponseDataFromTBE; + s_setSharedFlip; + pp_popProbeQueue; + } + + transition({S_F0, Es_F0, E0_F, E1_Es}, {PrbInvData, PrbInvDataDemand, PrbInv}, IF_E0S) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + // invalidate everything you've got + ib_invBothClusters; + ii_invIcache; + i2_invL2; + // but make sure you have room for what you need from the fill + a0_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({S_F1, Es_F1, E1_F, E0_Es}, {PrbInvData, PrbInvDataDemand, PrbInv}, IF_E1S) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + // invalidate everything you've got + ib_invBothClusters; + ii_invIcache; + i2_invL2; + // but make sure you have room for what you need from the fill + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({S_F, Es_F}, {PrbInvData, PrbInvDataDemand, PrbInv}, IF_ES) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + // invalidate everything you've got + ib_invBothClusters; + ii_invIcache; + i2_invL2; + // but make sure you have room for what you need from the fill + a0_allocateL1D; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition(Si_F0, {PrbInvData, PrbInvDataDemand, PrbInv}, F_S0) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + nS_issueRdBlkS; + pp_popProbeQueue; + } + + transition(Si_F1, {PrbInvData, PrbInvDataDemand, PrbInv}, F_S1) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + nS_issueRdBlkS; + pp_popProbeQueue; + } + + transition({Es_F0, E0_F, E1_Es}, {PrbShrData, PrbShrDataDemand}, S_F0) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition({Es_F1, E1_F, E0_Es}, {PrbShrData, PrbShrDataDemand}, S_F1) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition(Es_F, {PrbShrData, PrbShrDataDemand}, S_F) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition({S_F0, S_F1, S_F, Si_F0, Si_F1}, {PrbShrData, PrbShrDataDemand}) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition(S_M0, {PrbInvData, PrbInvDataDemand}, I_M0) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pim_sendProbeResponseInvMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition(O_M0, {PrbInvData, PrbInvDataDemand}, I_M0) {L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pdm_sendProbeResponseDataMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S_M0, O_M0}, {PrbInv}, I_M0) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pim_sendProbeResponseInvMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition(S_M1, {PrbInvData, PrbInvDataDemand}, I_M1) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pim_sendProbeResponseInvMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition(O_M1, {PrbInvData, PrbInvDataDemand}, I_M1) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pdm_sendProbeResponseDataMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S_M1, O_M1}, {PrbInv}, I_M1) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pim_sendProbeResponseInvMs; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S0, S0_C}, {PrbInvData, PrbInvDataDemand, PrbInv}) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S1, S1_C}, {PrbInvData, PrbInvDataDemand, PrbInv}) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + ii_invIcache; + i2_invL2; + ai_allocateL1I; + a2_allocateL2; + pp_popProbeQueue; + } + + transition({S_M0, S_M1}, {PrbShrData, PrbShrDataDemand}) {} { + ph_sendProbeResponseHit; + pp_popProbeQueue; + } + + transition({O_M0, O_M1}, {PrbShrData, PrbShrDataDemand}) {L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({S0, S1, S0_C, S1_C}, {PrbShrData, PrbShrDataDemand}) {} { + pb_sendProbeResponseBackprobe; + pp_popProbeQueue; + } + + transition({Ms_F0, M0_F, M1_Ms, O_F0}, {PrbInvData, PrbInvDataDemand}, IF_E0S) {L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pd_sendProbeResponseData; + ib_invBothClusters; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F1, M1_F, M0_Ms, O_F1}, {PrbInvData, PrbInvDataDemand}, IF_E1S) {L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pd_sendProbeResponseData; + ib_invBothClusters; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F, O_F}, {PrbInvData, PrbInvDataDemand}, IF_ES) {L2DataArrayRead} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pd_sendProbeResponseData; + ib_invBothClusters; + i2_invL2; + a0_allocateL1D; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F0, M0_F, M1_Ms, O_F0}, PrbInv, IF_E0S) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + i2_invL2; + a0_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F1, M1_F, M0_Ms, O_F1}, PrbInv, IF_E1S) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + i2_invL2; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F, O_F}, PrbInv, IF_ES) {} { + forward_eviction_to_cpu0; + forward_eviction_to_cpu1; + pi_sendProbeResponseInv; + ib_invBothClusters; + i2_invL2; + a0_allocateL1D; + a1_allocateL1D; + a2_allocateL2; + n_issueRdBlk; + pp_popProbeQueue; + } + + transition({Ms_F0, M0_F, M1_Ms}, {PrbShrData, PrbShrDataDemand}, O_F0) {L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({Ms_F1, M1_F, M0_Ms}, {PrbShrData, PrbShrDataDemand}, O_F1) {} { + } + + transition({Ms_F}, {PrbShrData, PrbShrDataDemand}, O_F) {L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + transition({O_F0, O_F1, O_F}, {PrbShrData, PrbShrDataDemand}) {L2DataArrayRead} { + pd_sendProbeResponseData; + pp_popProbeQueue; + } + + // END TRANSITIONS +} + + diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm new file mode 100644 index 000000000..52d87fb8b --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm @@ -0,0 +1,2038 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +machine(MachineType:Directory, "AMD_Base-like protocol") +: DirectoryMemory * directory; + CacheMemory * L3CacheMemory; + Cycles response_latency := 5; + Cycles response_latency_regionDir := 1; + Cycles l3_hit_latency := 30; + bool useL3OnWT := "False"; + Cycles to_memory_controller_latency := 1; + + // From the Cores + MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request"; + MessageBuffer * responseFromCores, network="From", virtual_network="2", vnet_type="response"; + MessageBuffer * unblockFromCores, network="From", virtual_network="4", vnet_type="unblock"; + + // To the Cores + MessageBuffer * probeToCore, network="To", virtual_network="0", vnet_type="request"; + MessageBuffer * responseToCore, network="To", virtual_network="2", vnet_type="response"; + + // From region buffer + MessageBuffer * reqFromRegBuf, network="From", virtual_network="7", vnet_type="request"; + + // To Region directory + MessageBuffer * reqToRegDir, network="To", virtual_network="5", vnet_type="request"; + MessageBuffer * reqFromRegDir, network="From", virtual_network="5", vnet_type="request"; + MessageBuffer * unblockToRegDir, network="To", virtual_network="4", vnet_type="unblock"; + + MessageBuffer * triggerQueue; + MessageBuffer * L3triggerQueue; + MessageBuffer * responseFromMemory; +{ + // STATES + state_declaration(State, desc="Directory states", default="Directory_State_U") { + U, AccessPermission:Backing_Store, desc="unblocked"; + BR, AccessPermission:Backing_Store, desc="got CPU read request, blocked while sent to L3"; + BW, AccessPermission:Backing_Store, desc="got CPU write request, blocked while sent to L3"; + BL, AccessPermission:Busy, desc="got L3 WB request"; + // BL is Busy because it's possible for the data only to be in the network + // in the WB, L3 has sent it and gone on with its business in possibly I + // state. + BI, AccessPermission:Backing_Store, desc="Blocked waiting for inv ack from core"; + BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory"; + BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack"; + + // These are needed for when a private requests was issued before an inv was received + // for writebacks + BS_Pm_BL, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + BM_Pm_BL, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B_Pm_BL, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + BP_BL, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory"; + // for reads + BS_Pm_B, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + BM_Pm_B, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B_Pm_B, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + BP_B, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory"; + } + + // Events + enumeration(Event, desc="Directory events") { + // CPU requests + RdBlkS, desc="..."; + RdBlkM, desc="..."; + RdBlk, desc="..."; + WriteThrough, desc="WriteThrough Message"; + Atomic, desc="Atomic Message"; + + RdBlkSP, desc="..."; + RdBlkMP, desc="..."; + RdBlkP, desc="..."; + VicDirtyP, desc="..."; + VicCleanP, desc="..."; + WriteThroughP, desc="WriteThrough Message"; + AtomicP, desc="Atomic Message"; + + // writebacks + VicDirty, desc="..."; + VicClean, desc="..."; + CPUData, desc="WB data from CPU"; + StaleWB, desc="WB response for a no longer valid request"; + + // probe responses + CPUPrbResp, desc="Probe Response Msg"; + LastCPUPrbResp, desc="Last Probe Response Msg"; + + ProbeAcksComplete, desc="Probe Acks Complete"; + + L3Hit, desc="Hit in L3 return data to core"; + + // Memory Controller + MemData, desc="Fetched data from memory arrives"; + WBAck, desc="Writeback Ack from memory arrives"; + + CoreUnblock, desc="Core received data, unblock"; + UnblockWriteThrough, desc="unblock, self triggered"; + + StaleVicDirty, desc="Core invalidated before VicDirty processed"; + StaleVicDirtyP, desc="Core invalidated before VicDirty processed"; + + // For region protocol + CPUReq, desc="Generic CPU request"; + Inv, desc="Region dir needs a block invalidated"; + Downgrade, desc="Region dir needs a block downgraded"; + + // For private accesses (bypassed reg-dir) + CPUReadP, desc="Initial req from core, sent to L3"; + CPUWriteP, desc="Initial req from core, sent to L3"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + L3DataArrayRead, desc="Read the data array"; + L3DataArrayWrite, desc="Write the data array"; + L3TagArrayRead, desc="Read the data array"; + L3TagArrayWrite, desc="Write the data array"; + } + + // TYPES + + // DirectoryEntry + structure(Entry, desc="...", interface="AbstractEntry") { + State DirectoryState, desc="Directory state"; + DataBlock DataBlk, desc="data for the block"; + NetDest VicDirtyIgnore, desc="VicDirty coming from whom to ignore"; + } + + structure(CacheEntry, desc="...", interface="AbstractCacheEntry") { + DataBlock DataBlk, desc="data for the block"; + MachineID LastSender, desc="Mach which this block came from"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block"; + DataBlock DataBlkAux, desc="Auxiliary data for the block"; + bool Dirty, desc="Is the data dirty?"; + int NumPendingAcks, desc="num acks expected"; + MachineID OriginalRequestor, desc="Original Requestor"; + MachineID WTRequestor, desc="WT Requestor"; + bool Cached, desc="data hit in Cache"; + bool MemData, desc="Got MemData?",default="false"; + bool wtData, desc="Got write through data?",default="false"; + bool atomicData, desc="Got Atomic op?",default="false"; + Cycles InitialRequestTime, desc="..."; + Cycles ForwardRequestTime, desc="..."; + Cycles ProbeRequestStartTime, desc="..."; + bool DemandRequest, desc="for profiling"; + MachineID LastSender, desc="Mach which this block came from"; + bool L3Hit, default="false", desc="Was this an L3 hit?"; + bool TriggeredAcksComplete, default="false", desc="True if already triggered acks complete"; + WriteMask writeMask, desc="outstanding write through mask"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs"; + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + void set_tbe(TBE a); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" { + Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr)); + + if (is_valid(dir_entry)) { + //DPRINTF(RubySlicc, "Getting entry %s: %s\n", addr, dir_entry.DataBlk); + return dir_entry; + } + + dir_entry := static_cast(Entry, "pointer", + directory.allocate(addr, new Entry)); + return dir_entry; + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if (is_valid(tbe) && tbe.MemData) { + DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe); + return tbe.DataBlk; + } + DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr)); + return getDirectoryEntry(addr).DataBlk; + } + + State getState(TBE tbe, CacheEntry entry, Addr addr) { + return getDirectoryEntry(addr).DirectoryState; + } + + State getStateFromAddr(Addr addr) { + return getDirectoryEntry(addr).DirectoryState; + } + + void setState(TBE tbe, CacheEntry entry, Addr addr, State state) { + getDirectoryEntry(addr).DirectoryState := state; + } + + AccessPermission getAccessPermission(Addr addr) { + // For this Directory, all permissions are just tracked in Directory, since + // it's not possible to have something in TBE but not Dir, just keep track + // of state all in one place. + if(directory.isPresent(addr)) { + return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState); + } + + return AccessPermission:NotPresent; + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + void setAccessPermission(CacheEntry entry, Addr addr, State state) { + getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state)); + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:L3DataArrayRead) { + L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L3DataArrayWrite) { + L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L3TagArrayRead) { + L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L3TagArrayWrite) { + L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:L3DataArrayRead) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L3DataArrayWrite) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L3TagArrayRead) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L3TagArrayWrite) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + // ** OUT_PORTS ** + out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore); + out_port(responseNetwork_out, ResponseMsg, responseToCore); + + out_port(requestNetworkReg_out, CPURequestMsg, reqToRegDir); + out_port(regAckNetwork_out, UnblockMsg, unblockToRegDir); + + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue); + + // ** IN_PORTS ** + + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=7) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == TriggerType:AcksComplete) { + trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe); + } else if (in_msg.Type == TriggerType:UnblockWriteThrough) { + trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe); + } else { + error("Unknown trigger msg"); + } + } + } + } + + in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=6) { + if (L3TriggerQueue_in.isReady(clockEdge())) { + peek(L3TriggerQueue_in, TriggerMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == TriggerType:L3Hit) { + trigger(Event:L3Hit, in_msg.addr, entry, tbe); + } else { + error("Unknown trigger msg"); + } + } + } + } + + // Unblock Network + in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=5) { + if (unblockNetwork_in.isReady(clockEdge())) { + peek(unblockNetwork_in, UnblockMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + trigger(Event:CoreUnblock, in_msg.addr, entry, tbe); + } + } + } + + // Core response network + in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=4) { + if (responseNetwork_in.isReady(clockEdge())) { + peek(responseNetwork_in, ResponseMsg) { + DPRINTF(RubySlicc, "core responses %s\n", in_msg); + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == CoherenceResponseType:CPUPrbResp) { + if (is_valid(tbe) && tbe.NumPendingAcks == 1 + && tbe.TriggeredAcksComplete == false) { + trigger(Event:LastCPUPrbResp, in_msg.addr, entry, tbe); + } else { + trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:CPUData) { + trigger(Event:CPUData, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:StaleNotif) { + trigger(Event:StaleWB, in_msg.addr, entry, tbe); + } else { + error("Unexpected response type"); + } + } + } + } + + // off-chip memory request/response is done + in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=3) { + if (memQueue_in.isReady(clockEdge())) { + peek(memQueue_in, MemoryMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:MemData, in_msg.addr, entry, tbe); + DPRINTF(RubySlicc, "%s\n", in_msg); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them. + } else { + DPRINTF(RubySlicc, "%s\n", in_msg.Type); + error("Invalid message"); + } + } + } + } + + in_port(regBuf_in, CPURequestMsg, reqFromRegBuf, rank=2) { + if (regBuf_in.isReady(clockEdge())) { + peek(regBuf_in, CPURequestMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == CoherenceRequestType:ForceInv) { + trigger(Event:Inv, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:ForceDowngrade) { + trigger(Event:Downgrade, in_msg.addr, entry, tbe); + } else { + error("Bad request from region buffer"); + } + } + } + } + + in_port(regDir_in, CPURequestMsg, reqFromRegDir, rank=1) { + if (regDir_in.isReady(clockEdge())) { + peek(regDir_in, CPURequestMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == CoherenceRequestType:RdBlk) { + trigger(Event:RdBlk, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { + trigger(Event:RdBlkS, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { + trigger(Event:RdBlkM, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:Atomic) { + trigger(Event:Atomic, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:WriteThrough) { + trigger(Event:WriteThrough, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:VicDirty) { + if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { + DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr); + trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe); + } else { + trigger(Event:VicDirty, in_msg.addr, entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:VicClean) { + if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { + DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr); + trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe); + } else { + trigger(Event:VicClean, in_msg.addr, entry, tbe); + } + } else { + error("Bad message type fwded from Region Dir"); + } + } + } + } + + in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) { + if (requestNetwork_in.isReady(clockEdge())) { + peek(requestNetwork_in, CPURequestMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Private) { + // Bypass the region dir + if (in_msg.Type == CoherenceRequestType:RdBlk) { + trigger(Event:RdBlkP, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { + trigger(Event:RdBlkSP, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { + trigger(Event:RdBlkMP, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:Atomic) { + trigger(Event:AtomicP, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:WriteThrough) { + trigger(Event:WriteThroughP, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:VicDirty) { + if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { + DPRINTF(RubySlicc, "Dropping VicDirtyP for address %s\n", in_msg.addr); + trigger(Event:StaleVicDirtyP, in_msg.addr, entry, tbe); + } else { + DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr); + trigger(Event:VicDirtyP, in_msg.addr, entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:VicClean) { + if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { + DPRINTF(RubySlicc, "Dropping VicCleanP for address %s\n", in_msg.addr); + trigger(Event:StaleVicDirtyP, in_msg.addr, entry, tbe); + } else { + DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr); + trigger(Event:VicCleanP, in_msg.addr, entry, tbe); + } + } else { + error("Bad message type for private access"); + } + } else { + trigger(Event:CPUReq, in_msg.addr, entry, tbe); + } + } + } + } + + // Actions + action(s_sendResponseS, "s", desc="send Shared response") { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Shared; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := tbe.ForwardRequestTime; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.DemandRequest := tbe.DemandRequest; + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(es_sendResponseES, "es", desc="send Exclusive or Shared response") { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := tbe.Dirty; + if (tbe.Cached) { + out_msg.State := CoherenceState:Shared; + } else { + out_msg.State := CoherenceState:Exclusive; + } + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := tbe.ForwardRequestTime; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.DemandRequest := tbe.DemandRequest; + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(m_sendResponseM, "m", desc="send Modified response") { + if (tbe.wtData) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:UnblockWriteThrough; + } + } else { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := tbe.Dirty; + out_msg.State := CoherenceState:Modified; + out_msg.CtoD := false; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := tbe.ForwardRequestTime; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.DemandRequest := tbe.DemandRequest; + out_msg.L3Hit := tbe.L3Hit; + if (tbe.atomicData) { + out_msg.WTRequestor := tbe.WTRequestor; + } + DPRINTF(RubySlicc, "%s\n", out_msg); + } + if (tbe.atomicData) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:UnblockWriteThrough; + } + } + } + } + + action(sb_sendResponseSBypass, "sb", desc="send Shared response") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Shared; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.DemandRequest := false; + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(esb_sendResponseESBypass, "esb", desc="send Exclusive or Shared response") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := tbe.Dirty; + if (tbe.Cached || in_msg.ForceShared) { + out_msg.State := CoherenceState:Shared; + } else { + out_msg.State := CoherenceState:Exclusive; + } + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.DemandRequest := false; + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(mbwt_sendResponseWriteThroughBypass, "mbwt", desc="send write through response") { + peek(requestNetwork_in, CPURequestMsg) { + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysWBAck; + out_msg.Destination.add(in_msg.Requestor); + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + out_msg.DemandRequest := false; + } + } else { + assert(in_msg.Type == CoherenceRequestType:Atomic); + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := getDirectoryEntry(address).DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := in_msg.Dirty; + out_msg.State := CoherenceState:Modified; + out_msg.CtoD := false; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.DemandRequest := false; + out_msg.L3Hit := tbe.L3Hit; + out_msg.WTRequestor := in_msg.WTRequestor; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:UnblockWriteThrough; + } + } + } + + action(mb_sendResponseMBypass, "mb", desc="send Modified response") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := tbe.Dirty; + out_msg.State := CoherenceState:Modified; + out_msg.CtoD := false; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.DemandRequest := false; + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(c_sendResponseCtoD, "c", desc="send CtoD Ack") { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Modified; + out_msg.CtoD := true; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.DemandRequest := tbe.DemandRequest; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(cp_sendResponseCtoDP, "cp", desc="send CtoD Ack") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Modified; + out_msg.CtoD := true; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + out_msg.DemandRequest := false; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(w_sendResponseWBAck, "w", desc="send WB Ack") { + peek(regDir_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysWBAck; + out_msg.Destination.add(in_msg.Requestor); + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; + out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + out_msg.DemandRequest := false; + } + } + } + + action(wp_sendResponseWBAckP, "wp", desc="send WB Ack") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysWBAck; + out_msg.Destination.add(in_msg.Requestor); + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + out_msg.DemandRequest := false; + } + } + } + + action(wc_sendResponseWBAck, "wc", desc="send WB Ack for cancel") { + peek(responseNetwork_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysWBAck; + out_msg.Destination.add(in_msg.Sender); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(ra_ackRegionDir, "ra", desc="Ack region dir") { + peek(regDir_in, CPURequestMsg) { + if (in_msg.NoAckNeeded == false) { + enqueue(responseNetwork_out, ResponseMsg, response_latency_regionDir) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DirReadyAck; + out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + } + + action(l_queueMemRdReq, "lr", desc="Read data from memory") { + peek(regDir_in, CPURequestMsg) { + if (L3CacheMemory.isTagPresent(address)) { + enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + tbe.DataBlk := entry.DataBlk; + tbe.LastSender := entry.LastSender; + tbe.L3Hit := true; + tbe.MemData := true; + DPRINTF(RubySlicc, "L3 data is %s\n", entry.DataBlk); + L3CacheMemory.deallocate(address); + } else { + queueMemoryRead(machineID, address, to_memory_controller_latency); + } + } + } + + action(lrp_queueMemRdReqP, "lrp", desc="Read data from memory") { + peek(requestNetwork_in, CPURequestMsg) { + if (L3CacheMemory.isTagPresent(address)) { + enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + tbe.DataBlk := entry.DataBlk; + tbe.LastSender := entry.LastSender; + tbe.L3Hit := true; + tbe.MemData := true; + DPRINTF(RubySlicc, "L3 data is %s\n", entry.DataBlk); + L3CacheMemory.deallocate(address); + } else { + queueMemoryRead(machineID, address, to_memory_controller_latency); + } + } + } + + action(dcr_probeInvCoreData, "dcr", desc="probe inv cores, return data") { + peek(regBuf_in, CPURequestMsg) { + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination := in_msg.Sharers; + tbe.NumPendingAcks := tbe.NumPendingAcks + in_msg.Sharers.count(); + DPRINTF(RubySlicc, "%s\n", out_msg); + APPEND_TRANSITION_COMMENT(" dcr: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(ddr_probeDownCoreData, "ddr", desc="probe inv cores, return data") { + peek(regBuf_in, CPURequestMsg) { + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination := in_msg.Sharers; + tbe.NumPendingAcks := tbe.NumPendingAcks + in_msg.Sharers.count(); + DPRINTF(RubySlicc, "%s\n", out_msg); + APPEND_TRANSITION_COMMENT(" dcr: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") { + peek(requestNetwork_in, CPURequestMsg) { // not the right network? + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + tbe.NumPendingAcks := tbe.NumPendingAcks +machineCount(MachineType:CorePair) - 1; + out_msg.Destination.broadcast(MachineType:TCP); + tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:TCP); + out_msg.Destination.broadcast(MachineType:SQC); + tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:SQC); + out_msg.Destination.remove(in_msg.Requestor); + DPRINTF(RubySlicc, "%s\n", (out_msg)); + APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") { + peek(requestNetwork_in, CPURequestMsg) { // not the right network? + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := false; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + tbe.NumPendingAcks := tbe.NumPendingAcks +machineCount(MachineType:CorePair) - 1; + out_msg.Destination.broadcast(MachineType:TCP); + tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:TCP); + out_msg.Destination.broadcast(MachineType:SQC); + tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:SQC); + out_msg.Destination.remove(in_msg.Requestor); + APPEND_TRANSITION_COMMENT(" ic: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + DPRINTF(RubySlicc, "%s\n", out_msg); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(d_writeDataToMemory, "d", desc="Write data to memory") { + peek(responseNetwork_in, ResponseMsg) { + getDirectoryEntry(address).DataBlk := in_msg.DataBlk; + DPRINTF(RubySlicc, "Writing Data: %s to address %s\n", in_msg.DataBlk, + in_msg.addr); + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + peek(regDir_in, CPURequestMsg) { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + tbe.writeMask.clear(); + tbe.writeMask.orMask(in_msg.writeMask); + tbe.wtData := true; + tbe.WTRequestor := in_msg.WTRequestor; + tbe.LastSender := in_msg.Requestor; + } + if (in_msg.Type == CoherenceRequestType:Atomic) { + tbe.writeMask.clear(); + tbe.writeMask.orMask(in_msg.writeMask); + tbe.atomicData := true; + tbe.WTRequestor := in_msg.WTRequestor; + tbe.LastSender := in_msg.Requestor; + } + tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs + tbe.Dirty := false; + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask); + tbe.Dirty := false; + } + tbe.OriginalRequestor := in_msg.Requestor; + tbe.NumPendingAcks := 0; + tbe.Cached := in_msg.ForceShared; + tbe.InitialRequestTime := in_msg.InitialRequestTime; + tbe.ForwardRequestTime := curCycle(); + tbe.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + tbe.DemandRequest := in_msg.DemandRequest; + } + } + + action(tp_allocateTBEP, "tp", desc="allocate TBE Entry") { + check_allocate(TBEs); + peek(requestNetwork_in, CPURequestMsg) { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + tbe.writeMask.clear(); + tbe.writeMask.orMask(in_msg.writeMask); + tbe.wtData := true; + tbe.WTRequestor := in_msg.WTRequestor; + tbe.LastSender := in_msg.Requestor; + } + if (in_msg.Type == CoherenceRequestType:Atomic) { + tbe.writeMask.clear(); + tbe.writeMask.orMask(in_msg.writeMask); + tbe.atomicData := true; + tbe.WTRequestor := in_msg.WTRequestor; + tbe.LastSender := in_msg.Requestor; + } + tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs + tbe.Dirty := false; + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask); + tbe.Dirty := false; + } + tbe.OriginalRequestor := in_msg.Requestor; + tbe.NumPendingAcks := 0; + tbe.Cached := in_msg.ForceShared; + tbe.InitialRequestTime := in_msg.InitialRequestTime; + tbe.ForwardRequestTime := curCycle(); + tbe.ProbeRequestStartTime := in_msg.ProbeRequestStartTime; + tbe.DemandRequest := false; + } + } + + action(sa_setAcks, "sa", desc="setAcks") { + peek(regDir_in, CPURequestMsg) { + tbe.NumPendingAcks := in_msg.Acks; + APPEND_TRANSITION_COMMENT(" waiting for acks "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + } + } + + action(tr_allocateTBE, "tr", desc="allocate TBE Entry for Region inv") { + check_allocate(TBEs); + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.NumPendingAcks := 0; + } + + action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(wdp_writeBackDataPrivate, "wdp", desc="Write back data if needed") { + peek(requestNetwork_in, CPURequestMsg) { + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + tbe.DataBlkAux := getDirectoryEntry(address).DataBlk; + tbe.DataBlkAux.copyPartial(in_msg.DataBlk,in_msg.writeMask); + getDirectoryEntry(address).DataBlk := tbe.DataBlkAux; + } else{ + assert(in_msg.Type == CoherenceRequestType:Atomic); + tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask); + getDirectoryEntry(address).DataBlk := tbe.DataBlkAux; + } + } + } + + action(wd_writeBackData, "wd", desc="Write back data if needed") { + if (tbe.wtData) { + DataBlock tmp := getDirectoryEntry(address).DataBlk; + tmp.copyPartial(tbe.DataBlk,tbe.writeMask); + tbe.DataBlk := tmp; + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } else if (tbe.atomicData) { + tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask); + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } else if (tbe.Dirty == true) { + APPEND_TRANSITION_COMMENT(" Wrote data back "); + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } + } + + action(wdi_writeBackDataInv, "wdi", desc="Write back inv data if needed") { + // Kind of opposite from above...? + if (tbe.Dirty == true) { + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + APPEND_TRANSITION_COMMENT("Writing dirty data to dir"); + DPRINTF(RubySlicc, "Data %s: %s\n", address, tbe.DataBlk); + } else { + APPEND_TRANSITION_COMMENT("NOT!!! Writing dirty data to dir"); + } + } + + action(wdt_writeBackDataInvNoTBE, "wdt", desc="Write back inv data if needed no TBE") { + // Kind of opposite from above...? + peek(responseNetwork_in, ResponseMsg) { + if (in_msg.Dirty == true) { + getDirectoryEntry(address).DataBlk := in_msg.DataBlk; + APPEND_TRANSITION_COMMENT("Writing dirty data to dir"); + DPRINTF(RubySlicc, "Data %s: %s\n", address, in_msg.DataBlk); + } else { + APPEND_TRANSITION_COMMENT("NOT!!! Writing dirty data to dir"); + } + } + } + + action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") { + peek(memQueue_in, MemoryMsg) { + if (tbe.Dirty == false) { + tbe.DataBlk := getDirectoryEntry(address).DataBlk; + } + tbe.MemData := true; + } + } + + action(ml_writeL3DataToTBE, "ml", desc="write L3 data to TBE") { + assert(tbe.Dirty == false); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + tbe.DataBlk := entry.DataBlk; + tbe.LastSender := entry.LastSender; + tbe.L3Hit := true; + tbe.MemData := true; + } + + action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") { + peek(responseNetwork_in, ResponseMsg) { + if (in_msg.Dirty) { + DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender); + DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk); + if (tbe.wtData) { + DataBlock tmp := in_msg.DataBlk; + tmp.copyPartial(tbe.DataBlk,tbe.writeMask); + tbe.DataBlk := tmp; + } else if (tbe.Dirty) { + if(tbe.atomicData == false && tbe.wtData == false) { + DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender); + assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data + } + } else { + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + tbe.LastSender := in_msg.Sender; + } + } + if (in_msg.Hit) { + tbe.Cached := true; + } + } + } + + action(yc_writeCPUDataToTBE, "yc", desc="write CPU Data to TBE") { + peek(responseNetwork_in, ResponseMsg) { + if (in_msg.Dirty) { + DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender); + DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk); + if (tbe.Dirty) { + DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender); + assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data + } + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := false; + tbe.LastSender := in_msg.Sender; + } + } + } + + action(x_decrementAcks, "x", desc="decrement Acks pending") { + if (tbe.NumPendingAcks > 0) { + tbe.NumPendingAcks := tbe.NumPendingAcks - 1; + } else { + APPEND_TRANSITION_COMMENT(" Double ack! "); + } + assert(tbe.NumPendingAcks >= 0); + APPEND_TRANSITION_COMMENT(" Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + } + + action(o_checkForCompletion, "o", desc="check for ack completion") { + if (tbe.NumPendingAcks == 0 && tbe.TriggeredAcksComplete == false) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + tbe.TriggeredAcksComplete := true; + } + APPEND_TRANSITION_COMMENT(" Check: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + } + + action(ont_checkForCompletionNoTrigger, "ont", desc="check for ack completion, no trigger") { + if (tbe.NumPendingAcks == 0 && tbe.TriggeredAcksComplete == false) { + tbe.TriggeredAcksComplete := true; + } + APPEND_TRANSITION_COMMENT(" Check: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + } + + action(rvp_removeVicDirtyIgnore, "rvp", desc="Remove ignored core") { + peek(requestNetwork_in, CPURequestMsg) { + getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor); + } + } + + action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") { + peek(regDir_in, CPURequestMsg) { + getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor); + } + } + + action(r_sendRequestToRegionDir, "r", desc="send request to Region Directory") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(requestNetworkReg_out, CPURequestMsg, 1) { + out_msg.addr := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.Shared := in_msg.Shared; + out_msg.MessageSize := in_msg.MessageSize; + DPRINTF(RubySlicc, "out dest: %s\n", map_Address_to_RegionDir(address)); + } + } + } + + action(ai_ackInvalidate, "ai", desc="Ack to let the reg-dir know that the inv is ordered") { + peek(regBuf_in, CPURequestMsg) { + enqueue(regAckNetwork_out, UnblockMsg, 1) { + out_msg.addr := address; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "ai out_msg: %s\n", out_msg); + } + } + } + + action(aic_ackInvalidate, "aic", desc="Ack to let the reg-dir know that the inv is ordered") { + peek(responseNetwork_in, ResponseMsg) { + if (in_msg.NoAckNeeded == false) { + enqueue(regAckNetwork_out, UnblockMsg, 1) { + out_msg.addr := address; + if (machineIDToMachineType(in_msg.Sender) == MachineType:CorePair) { + out_msg.Destination.add(createMachineID(MachineType:RegionBuffer, intToID(0))); + } else { + out_msg.Destination.add(createMachineID(MachineType:RegionBuffer, intToID(1))); + } + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "ai out_msg: %s\n", out_msg); + out_msg.wasValid := in_msg.isValid; + } + } + } + } + + action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") { + peek(responseNetwork_in, ResponseMsg) { + if (L3CacheMemory.isTagPresent(address)) { + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) "); + entry.DataBlk := in_msg.DataBlk; + entry.LastSender := in_msg.Sender; + } else { + if (L3CacheMemory.cacheAvail(address) == false) { + Addr victim := L3CacheMemory.cacheProbe(address); + CacheEntry victim_entry := static_cast(CacheEntry, "pointer", + L3CacheMemory.lookup(victim)); + queueMemoryWrite(machineID, victim, to_memory_controller_latency, + victim_entry.DataBlk); + L3CacheMemory.deallocate(victim); + } + assert(L3CacheMemory.cacheAvail(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 "); + entry.DataBlk := in_msg.DataBlk; + entry.LastSender := in_msg.Sender; + } + } + } + + action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") { + if ((tbe.wtData || tbe.atomicData) && useL3OnWT) { + if (L3CacheMemory.isTagPresent(address)) { + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) "); + entry.DataBlk := tbe.DataBlk; + entry.LastSender := tbe.LastSender; + } else { + if (L3CacheMemory.cacheAvail(address) == false) { + Addr victim := L3CacheMemory.cacheProbe(address); + CacheEntry victim_entry := static_cast(CacheEntry, "pointer", + L3CacheMemory.lookup(victim)); + queueMemoryWrite(machineID, victim, to_memory_controller_latency, + victim_entry.DataBlk); + L3CacheMemory.deallocate(victim); + } + assert(L3CacheMemory.cacheAvail(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 "); + entry.DataBlk := tbe.DataBlk; + entry.LastSender := tbe.LastSender; + } + } + } + + action(ali_allocateL3Block, "ali", desc="allocate the L3 block on ForceInv") { + if (tbe.Dirty == true) { + if (L3CacheMemory.isTagPresent(address)) { + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) "); + entry.DataBlk := tbe.DataBlk; + entry.LastSender := tbe.LastSender; + } else { + if (L3CacheMemory.cacheAvail(address) == false) { + Addr victim := L3CacheMemory.cacheProbe(address); + CacheEntry victim_entry := static_cast(CacheEntry, "pointer", + L3CacheMemory.lookup(victim)); + queueMemoryWrite(machineID, victim, to_memory_controller_latency, + victim_entry.DataBlk); + L3CacheMemory.deallocate(victim); + } + assert(L3CacheMemory.cacheAvail(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 "); + entry.DataBlk := tbe.DataBlk; + entry.LastSender := tbe.LastSender; + } + } + } + + action(ali_allocateL3BlockNoTBE, "alt", desc="allocate the L3 block on ForceInv no TBE") { + peek(responseNetwork_in, ResponseMsg) { + if (in_msg.Dirty) { + if (L3CacheMemory.isTagPresent(address)) { + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + APPEND_TRANSITION_COMMENT(" ali wrote data to L3 (hit) "); + entry.DataBlk := in_msg.DataBlk; + entry.LastSender := in_msg.Sender; + } else { + if (L3CacheMemory.cacheAvail(address) == false) { + Addr victim := L3CacheMemory.cacheProbe(address); + CacheEntry victim_entry := static_cast(CacheEntry, "pointer", + L3CacheMemory.lookup(victim)); + queueMemoryWrite(machineID, victim, to_memory_controller_latency, + victim_entry.DataBlk); + L3CacheMemory.deallocate(victim); + } + assert(L3CacheMemory.cacheAvail(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); + APPEND_TRANSITION_COMMENT(" ali wrote data to L3 "); + entry.DataBlk := in_msg.DataBlk; + entry.LastSender := in_msg.Sender; + } + } + } + } + + action(dl_deallocateL3, "dl", desc="deallocate the L3 block") { + L3CacheMemory.deallocate(address); + } + + action(p_popRequestQueue, "p", desc="pop request queue") { + requestNetwork_in.dequeue(clockEdge()); + } + + action(prd_popRegionQueue, "prd", desc="pop request queue") { + regDir_in.dequeue(clockEdge()); + } + + action(prb_popRegionBufQueue, "prb", desc="pop request queue") { + regBuf_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="pop response queue") { + responseNetwork_in.dequeue(clockEdge()); + } + + action(pm_popMemQueue, "pm", desc="pop mem queue") { + memQueue_in.dequeue(clockEdge()); + } + + action(pt_popTriggerQueue, "pt", desc="pop trigger queue") { + triggerQueue_in.dequeue(clockEdge()); + } + + action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") { + L3TriggerQueue_in.dequeue(clockEdge()); + } + + action(pu_popUnblockQueue, "pu", desc="pop unblock queue") { + unblockNetwork_in.dequeue(clockEdge()); + } + + action(yy_recycleResponseQueue, "yy", desc="recycle response queue") { + responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(ww_stallAndWaitRegRequestQueue, "ww", desc="recycle region dir request queue") { + stall_and_wait(regDir_in, address); + } + + action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") { + stall_and_wait(requestNetwork_in, address); + } + + action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") { + wakeUpBuffers(address); + } + + action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") { + wakeUpAllBuffers(); + } + + action(z_stall, "z", desc="...") { + } + + // TRANSITIONS + + // transitions from U + + transition({BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {Inv, Downgrade}) { + ww_stallAndWaitRegRequestQueue; + } + + transition(U, Inv, BI){L3TagArrayRead} { + tr_allocateTBE; + dcr_probeInvCoreData; // only need to invalidate sharers + ai_ackInvalidate; + prb_popRegionBufQueue; + } + + transition(U, Downgrade, BI){L3TagArrayRead} { + tr_allocateTBE; + ddr_probeDownCoreData; // only need to invalidate sharers + ai_ackInvalidate; + prb_popRegionBufQueue; + } + + // The next 2 transistions are needed in the event that an invalidation + // is waiting for its ack from the core, but the event makes it through + // the region directory before the acks. This wouldn't be needed if + // we waited to ack the region dir until the directory got all the acks + transition({BR, BW, BI, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, WriteThrough, Atomic}) { + ww_stallAndWaitRegRequestQueue; + } + + transition({BR, BW, BI, BL, BS_M, BM_M, B_M, BS_PM, BM_PM, B_PM, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {RdBlkSP, RdBlkMP, RdBlkP}) { + st_stallAndWaitRequest; + } + + transition({BR, BW, BI, BL, BS_M, BM_M, B_M, BS_PM, BM_PM, B_PM, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {WriteThroughP,AtomicP}) { + st_stallAndWaitRequest; + } + + transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead} { + t_allocateTBE; + l_queueMemRdReq; + sa_setAcks; + o_checkForCompletion; + ra_ackRegionDir; + prd_popRegionQueue; + } + + transition(U, WriteThrough, BM_PM){L3TagArrayRead} { + t_allocateTBE; + w_sendResponseWBAck; + l_queueMemRdReq; + sa_setAcks; + o_checkForCompletion; + ra_ackRegionDir; + prd_popRegionQueue; + } + + transition(U, {RdBlkM,Atomic}, BM_PM){L3TagArrayRead} { + t_allocateTBE; + l_queueMemRdReq; + sa_setAcks; + o_checkForCompletion; + ra_ackRegionDir; + prd_popRegionQueue; + } + + transition(U, RdBlk, B_PM){L3TagArrayRead} { + t_allocateTBE; + l_queueMemRdReq; + sa_setAcks; + o_checkForCompletion; + ra_ackRegionDir; + prd_popRegionQueue; + } + + transition(U, {RdBlkSP}, BS_M) {L3TagArrayRead} { + tp_allocateTBEP; + lrp_queueMemRdReqP; + p_popRequestQueue; + } + + transition(U, WriteThroughP, BM_M) {L3TagArrayRead} { + tp_allocateTBEP; + wp_sendResponseWBAckP; + lrp_queueMemRdReqP; + p_popRequestQueue; + } + + transition(U, {RdBlkMP,AtomicP}, BM_M) {L3TagArrayRead} { + tp_allocateTBEP; + lrp_queueMemRdReqP; + p_popRequestQueue; + } + + transition(U, RdBlkP, B_M) {L3TagArrayRead} { + tp_allocateTBEP; + lrp_queueMemRdReqP; + p_popRequestQueue; + } + + transition(U, VicDirtyP, BL) {L3TagArrayRead} { + tp_allocateTBEP; + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(U, VicCleanP, BL) {L3TagArrayRead} { + tp_allocateTBEP; + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(BM_Pm, RdBlkSP, BM_Pm_B) {L3DataArrayWrite} { + sb_sendResponseSBypass; + p_popRequestQueue; + } + + transition(BS_Pm, RdBlkSP, BS_Pm_B) {L3DataArrayWrite} { + sb_sendResponseSBypass; + p_popRequestQueue; + } + + transition(B_Pm, RdBlkSP, B_Pm_B) {L3DataArrayWrite} { + sb_sendResponseSBypass; + p_popRequestQueue; + } + + transition(BP, RdBlkSP, BP_B) {L3DataArrayWrite} { + sb_sendResponseSBypass; + p_popRequestQueue; + } + + transition(BM_Pm, RdBlkMP, BM_Pm_B) {L3DataArrayWrite} { + mb_sendResponseMBypass; + p_popRequestQueue; + } + + transition(BS_Pm, RdBlkMP, BS_Pm_B) {L3DataArrayWrite} { + mb_sendResponseMBypass; + p_popRequestQueue; + } + + transition(B_Pm, RdBlkMP, B_Pm_B) {L3DataArrayWrite} { + mb_sendResponseMBypass; + p_popRequestQueue; + } + + transition(BP, RdBlkMP, BP_B) {L3DataArrayWrite} { + mb_sendResponseMBypass; + p_popRequestQueue; + } + + transition(BM_Pm, {WriteThroughP,AtomicP}, BM_Pm_B) {L3DataArrayWrite} { + wdp_writeBackDataPrivate; + mbwt_sendResponseWriteThroughBypass; + p_popRequestQueue; + } + + transition(BS_Pm, {WriteThroughP,AtomicP}, BS_Pm_B) {L3DataArrayWrite} { + wdp_writeBackDataPrivate; + mbwt_sendResponseWriteThroughBypass; + p_popRequestQueue; + } + + transition(B_Pm, {WriteThroughP,AtomicP}, B_Pm_B) {L3DataArrayWrite} { + wdp_writeBackDataPrivate; + mbwt_sendResponseWriteThroughBypass; + p_popRequestQueue; + } + + transition(BP, {WriteThroughP,AtomicP}, BP_B) {L3DataArrayWrite} { + wdp_writeBackDataPrivate; + mbwt_sendResponseWriteThroughBypass; + p_popRequestQueue; + } + + transition(BM_Pm, RdBlkP, BM_Pm_B) {L3DataArrayWrite} { + esb_sendResponseESBypass; + p_popRequestQueue; + } + + transition(BS_Pm, RdBlkP, BS_Pm_B) {L3DataArrayWrite} { + esb_sendResponseESBypass; + p_popRequestQueue; + } + + transition(B_Pm, RdBlkP, B_Pm_B) {L3DataArrayWrite}{ + esb_sendResponseESBypass; + p_popRequestQueue; + } + + transition(BP, RdBlkP, BP_B) {L3DataArrayWrite}{ + esb_sendResponseESBypass; + p_popRequestQueue; + } + + transition(BM_Pm_B, CoreUnblock, BM_Pm) { + wa_wakeUpDependents; + pu_popUnblockQueue; + } + + transition(BS_Pm_B, CoreUnblock, BS_Pm) { + wa_wakeUpDependents; + pu_popUnblockQueue; + } + + transition(B_Pm_B, CoreUnblock, B_Pm) { + wa_wakeUpDependents; + pu_popUnblockQueue; + } + + transition(BP_B, CoreUnblock, BP) { + wa_wakeUpDependents; + pu_popUnblockQueue; + } + + transition(BM_Pm_B, UnblockWriteThrough, BM_Pm) { + wa_wakeUpDependents; + pt_popTriggerQueue; + } + + transition(BS_Pm_B, UnblockWriteThrough, BS_Pm) { + wa_wakeUpDependents; + pt_popTriggerQueue; + } + + transition(B_Pm_B, UnblockWriteThrough, B_Pm) { + wa_wakeUpDependents; + pt_popTriggerQueue; + } + + transition(BP_B, UnblockWriteThrough, BP) { + wa_wakeUpDependents; + pt_popTriggerQueue; + } + + transition(BM_Pm, VicDirtyP, BM_Pm_BL) { + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(BS_Pm, VicDirtyP, BS_Pm_BL) { + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(B_Pm, VicDirtyP, B_Pm_BL) { + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(BP, VicDirtyP, BP_BL) { + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(BM_Pm, VicCleanP, BM_Pm_BL) { + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(BS_Pm, VicCleanP, BS_Pm_BL) { + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(B_Pm, VicCleanP, B_Pm_BL) { + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(BP, VicCleanP, BP_BL) { + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition(BM_Pm_BL, CPUData, BM_Pm) { + yc_writeCPUDataToTBE; + d_writeDataToMemory; + wa_wakeUpDependents; + pr_popResponseQueue; + } + + transition(BS_Pm_BL, CPUData, BS_Pm) { + yc_writeCPUDataToTBE; + d_writeDataToMemory; + wa_wakeUpDependents; + pr_popResponseQueue; + } + + transition(B_Pm_BL, CPUData, B_Pm) { + yc_writeCPUDataToTBE; + d_writeDataToMemory; + wa_wakeUpDependents; + pr_popResponseQueue; + } + + transition(BP_BL, CPUData, BP) { + yc_writeCPUDataToTBE; + d_writeDataToMemory; + wa_wakeUpDependents; + pr_popResponseQueue; + } + + transition({BR, BW, BL}, {VicDirtyP, VicCleanP}) { + st_stallAndWaitRequest; + } + + transition({BR, BW, BL}, {VicDirty, VicClean}) { + ww_stallAndWaitRegRequestQueue; + } + + transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} { + dt_deallocateTBE; + d_writeDataToMemory; + al_allocateL3Block; + wa_wakeUpDependents; + pr_popResponseQueue; + } + + transition(BL, StaleWB, U) {L3TagArrayWrite} { + dt_deallocateTBE; + wa_wakeUpAllDependents; + pr_popResponseQueue; + } + + transition({BI, B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {VicDirty, VicClean}) { + ww_stallAndWaitRegRequestQueue; + } + + transition({BI, B, BS_M, BM_M, B_M, BS_PM, BM_PM, B_PM, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {VicDirtyP, VicCleanP}) { + st_stallAndWaitRequest; + } + + transition({U, BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, WBAck) { + pm_popMemQueue; + } + + transition({U, BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, StaleVicDirtyP) { + rvp_removeVicDirtyIgnore; + wp_sendResponseWBAckP; + p_popRequestQueue; + } + + transition({U, BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, StaleVicDirty) { + rv_removeVicDirtyIgnore; + w_sendResponseWBAck; + prd_popRegionQueue; + } + + transition(U, VicDirty, BL) {L3TagArrayRead} { + t_allocateTBE; + ra_ackRegionDir; + w_sendResponseWBAck; + prd_popRegionQueue; + } + + transition(U, VicClean, BL) {L3TagArrayRead} { + t_allocateTBE; + ra_ackRegionDir; + w_sendResponseWBAck; + prd_popRegionQueue; + } + + transition({B, BR}, CoreUnblock, U) { + wa_wakeUpDependents; + pu_popUnblockQueue; + } + + transition({B, BR}, UnblockWriteThrough, U) { + wa_wakeUpDependents; + pt_popTriggerQueue; + } + + transition(BS_M, MemData, B) {L3TagArrayWrite, L3DataArrayWrite} { + mt_writeMemDataToTBE; + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pm_popMemQueue; + } + + transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { + mt_writeMemDataToTBE; + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pm_popMemQueue; + } + + transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { + mt_writeMemDataToTBE; + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pm_popMemQueue; + } + + transition(BS_PM, MemData, BS_Pm) {} { + mt_writeMemDataToTBE; + wa_wakeUpDependents; + pm_popMemQueue; + } + + transition(BM_PM, MemData, BM_Pm){} { + mt_writeMemDataToTBE; + wa_wakeUpDependents; + pm_popMemQueue; + } + + transition(B_PM, MemData, B_Pm){} { + mt_writeMemDataToTBE; + wa_wakeUpDependents; + pm_popMemQueue; + } + + transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} { + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + ptl_popTriggerQueue; + } + + transition(BM_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} { + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + ptl_popTriggerQueue; + } + + transition(B_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} { + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + ptl_popTriggerQueue; + } + + transition(BS_PM, L3Hit, BS_Pm) { + wa_wakeUpDependents; + ptl_popTriggerQueue; + } + + transition(BM_PM, L3Hit, BM_Pm) { + wa_wakeUpDependents; + ptl_popTriggerQueue; + } + + transition(B_PM, L3Hit, B_Pm) { + wa_wakeUpDependents; + ptl_popTriggerQueue; + } + + transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BP, BI}, CPUPrbResp) { + aic_ackInvalidate; + y_writeProbeDataToTBE; + x_decrementAcks; + ont_checkForCompletionNoTrigger; + pr_popResponseQueue; + } + + transition({B, B_M, BS_M, BM_M}, {CPUPrbResp, LastCPUPrbResp}) { + z_stall; + } + + transition({BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {CPUPrbResp, LastCPUPrbResp}) { + // recycling because PrbResponse and data come on the same network + yy_recycleResponseQueue; + } + + transition(U, {CPUPrbResp, LastCPUPrbResp}) {L3TagArrayRead, L3DataArrayWrite} { + aic_ackInvalidate; + wdt_writeBackDataInvNoTBE; + ali_allocateL3BlockNoTBE; + pr_popResponseQueue; + } + + transition(BL, {CPUPrbResp, LastCPUPrbResp}) {} { + aic_ackInvalidate; + y_writeProbeDataToTBE; + wdi_writeBackDataInv; + ali_allocateL3Block; + pr_popResponseQueue; + } + + transition(BS_PM, LastCPUPrbResp, BS_M) { + aic_ackInvalidate; + y_writeProbeDataToTBE; + x_decrementAcks; + ont_checkForCompletionNoTrigger; + pr_popResponseQueue; + } + + transition(BS_PM, ProbeAcksComplete, BS_M) {} { + pt_popTriggerQueue; + } + + transition(BM_PM, LastCPUPrbResp, BM_M) { + aic_ackInvalidate; + y_writeProbeDataToTBE; + x_decrementAcks; + ont_checkForCompletionNoTrigger; + pr_popResponseQueue; + } + + transition(BM_PM, ProbeAcksComplete, BM_M) {} { + pt_popTriggerQueue; + } + + transition(B_PM, LastCPUPrbResp, B_M) { + aic_ackInvalidate; + y_writeProbeDataToTBE; + x_decrementAcks; + ont_checkForCompletionNoTrigger; + pr_popResponseQueue; + } + + transition(B_PM, ProbeAcksComplete, B_M){} { + pt_popTriggerQueue; + } + + transition(BS_Pm, LastCPUPrbResp, B) { + aic_ackInvalidate; + y_writeProbeDataToTBE; + x_decrementAcks; + ont_checkForCompletionNoTrigger; + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + ali_allocateL3Block; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + ali_allocateL3Block; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(BM_Pm, LastCPUPrbResp, B) { + aic_ackInvalidate; + y_writeProbeDataToTBE; + x_decrementAcks; + ont_checkForCompletionNoTrigger; + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + ali_allocateL3Block; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + ali_allocateL3Block; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(B_Pm, LastCPUPrbResp, B) { + aic_ackInvalidate; + y_writeProbeDataToTBE; + x_decrementAcks; + ont_checkForCompletionNoTrigger; + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + ali_allocateL3Block; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + ali_allocateL3Block; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(BP, LastCPUPrbResp, B) { + aic_ackInvalidate; + y_writeProbeDataToTBE; + x_decrementAcks; + ont_checkForCompletionNoTrigger; + c_sendResponseCtoD; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} { + c_sendResponseCtoD; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(BI, LastCPUPrbResp, B) { + aic_ackInvalidate; + y_writeProbeDataToTBE; + x_decrementAcks; + ont_checkForCompletionNoTrigger; + wa_wakeUpDependents; + wdi_writeBackDataInv; + ali_allocateL3Block; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(BI, ProbeAcksComplete, U) {L3TagArrayWrite, L3DataArrayWrite}{ + wa_wakeUpDependents; + wdi_writeBackDataInv; + ali_allocateL3Block; + dt_deallocateTBE; + pt_popTriggerQueue; + } + +} diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-msg.sm b/src/mem/protocol/MOESI_AMD_Base-Region-msg.sm new file mode 100644 index 000000000..823933e57 --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-Region-msg.sm @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +enumeration(CoherenceRequestType, desc="Coherence Request Types") { + // CPU Request Types ONLY + RdBlk, desc="Read Blk"; + RdBlkM, desc="Read Blk Modified"; + RdBlkS, desc="Read Blk Shared"; + VicClean, desc="L2 clean eviction"; + VicDirty, desc="L2 dirty eviction"; + + WrCancel, desc="want to cancel WB to Memory"; // should this be here? + + WBApproval, desc="WB Approval"; + + // Messages between Dir and R-Dir + ForceInv, desc="Send invalide to the block"; + ForceDowngrade, desc="Send downgrade to the block"; + Unblock, desc="Used to let the dir know a message has been sunk"; + + // Messages between R-Dir and R-Buffer + PrivateNotify, desc="Let region buffer know it has private access"; + SharedNotify, desc="Let region buffer know it has shared access"; + WbNotify, desc="Let region buffer know it saw its wb request"; + Downgrade, desc="Force the region buffer to downgrade to shared"; + // Response to R-Dir (probably should be on a different network, but + // I need it to be ordered with respect to requests) + InvAck, desc="Let the R-Dir know when the inv has occured"; + + PrivateRequest, desc="R-buf wants the region in private"; + UpgradeRequest, desc="R-buf wants the region in private"; + SharedRequest, desc="R-buf wants the region in shared (could respond with private)"; + CleanWbRequest, desc="R-buf wants to deallocate clean region"; + + NA, desc="So we don't get segfaults"; +} + +enumeration(ProbeRequestType, desc="Probe Request Types") { + PrbDowngrade, desc="Probe for Status"; // EtoS, MtoO, StoS + PrbInv, desc="Probe to Invalidate"; + + // For regions + PrbRepl, desc="Force the cache to do a replacement"; + PrbRegDowngrade, desc="Probe for Status"; // EtoS, MtoO, StoS +} + + +enumeration(CoherenceResponseType, desc="Coherence Response Types") { + NBSysResp, desc="Northbridge response to CPU Rd request"; + NBSysWBAck, desc="Northbridge response ok to WB"; + TDSysResp, desc="TCCdirectory response to CPU Rd request"; + TDSysWBAck, desc="TCCdirectory response ok to WB"; + TDSysWBNack, desc="TCCdirectory response ok to drop"; + CPUPrbResp, desc="CPU Probe Response"; + CPUData, desc="CPU Data"; + StaleNotif, desc="Notification of Stale WBAck, No data to writeback"; + CPUCancelWB, desc="want to cancel WB to Memory"; + MemData, desc="Data from Memory"; + + // for regions + PrivateAck, desc="Ack that r-buf received private notify"; + RegionWbAck, desc="Writeback Ack that r-buf completed deallocation"; + DirReadyAck, desc="Directory (mem ctrl)<->region dir handshake"; +} + +enumeration(CoherenceState, default="CoherenceState_NA", desc="Coherence State") { + Modified, desc="Modified"; + Owned, desc="Owned state"; + Exclusive, desc="Exclusive"; + Shared, desc="Shared"; + NA, desc="NA"; +} + +structure(CPURequestMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + Addr DemandAddress, desc="Physical block address for this request"; + CoherenceRequestType Type, desc="Type of request"; + DataBlock DataBlk, desc="data for the cache line"; // only for WB + bool Dirty, desc="whether WB data is dirty"; // only for WB + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Multicast destination mask"; + bool Shared, desc="For CPU_WrVicBlk, vic is O not M. For CPU_ClVicBlk, vic is S"; + MessageSizeType MessageSize, desc="size category of the message"; + Cycles InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache"; + Cycles ForwardRequestTime, default="0", desc="time the dir forwarded the request"; + Cycles ProbeRequestStartTime, default="0", desc="the time the dir started the probe request"; + bool DemandRequest, default="false", desc="For profiling purposes"; + + NetDest Sharers, desc="Caches that may have a valid copy of the data"; + bool ForceShared, desc="R-dir knows it is shared, pass on so it sends an S copy, not E"; + bool Private, default="false", desc="Requestor already has private permissions, no need for dir check"; + bool CtoDSinked, default="false", desc="This is true if the CtoD previously sent must have been sunk"; + + bool NoAckNeeded, default="false", desc="True if region buffer doesn't need to ack"; + int Acks, default="0", desc="Acks that the dir (mem ctrl) should expect to receive"; + CoherenceRequestType OriginalType, default="CoherenceRequestType_NA", desc="Type of request from core fwded through region buffer"; + + bool functionalRead(Packet *pkt) { + // Only PUTX messages contains the data block + if (Type == CoherenceRequestType:VicDirty) { + return testAndRead(addr, DataBlk, pkt); + } + + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return testAndWrite(addr, DataBlk, pkt); + } +} + +structure(NBProbeRequestMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + ProbeRequestType Type, desc="probe signal"; + bool ReturnData, desc="Indicates CPU should return data"; + NetDest Destination, desc="Node to whom the data is sent"; + MessageSizeType MessageSize, desc="size category of the message"; + bool DemandRequest, default="false", desc="demand request, requesting 3-hop transfer"; + Addr DemandAddress, desc="Demand block address for a region request"; + MachineID Requestor, desc="Requestor id for 3-hop requests"; + bool NoAckNeeded, default="false", desc="For short circuting acks"; + + bool functionalRead(Packet *pkt) { + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return false; + } + +} + +structure(TDProbeRequestMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + ProbeRequestType Type, desc="TD_PrbNxtState signal"; + bool ReturnData, desc="Indicates CPU should return data"; + bool localCtoD, desc="Indicates CtoD is within the GPU hierarchy (aka TCC subtree)"; + NetDest Destination, desc="Node to whom the data is sent"; + MessageSizeType MessageSize, desc="size category of the message"; + MachineID Sender, desc="Node who sent the data"; + bool currentOwner, default="false", desc="Is the sender the current owner"; + bool DoneAck, default="false", desc="Is this a done ack?"; + bool Dirty, default="false", desc="Was block dirty when evicted"; + bool wasValid, default="false", desc="Was block valid when evicted"; + bool valid, default="false", desc="Is block valid"; + bool validToInvalid, default="false", desc="Was block valid when evicted"; + + bool functionalRead(Packet *pkt) { + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return false; + } +} + +// Response Messages seemed to be easily munged into one type +structure(ResponseMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + CoherenceResponseType Type, desc="NB Sys Resp or CPU Response to Probe"; + MachineID Sender, desc="Node who sent the data"; + NetDest Destination, desc="Node to whom the data is sent"; + // Begin Used Only By CPU Response + DataBlock DataBlk, desc="data for the cache line"; + bool Hit, desc="probe hit valid line"; + bool Shared, desc="True if S, or if NB Probe ReturnData==1 && O"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + bool Ntsl, desc="indicates probed lin will be invalid after probe"; + bool UntransferredOwner, desc="pending confirmation of ownership change"; + // End Used Only By CPU Response + + // Begin NB Response Only + CoherenceState State, default=CoherenceState_NA, desc="What returned data from NB should be in"; + bool CtoD, desc="was the originator a CtoD?"; + // End NB Response Only + + bool NbReqShared, desc="modification of Shared field from initial request, e.g. hit by shared probe"; + + MessageSizeType MessageSize, desc="size category of the message"; + Cycles InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache"; + Cycles ForwardRequestTime, default="0", desc="time the dir forwarded the request"; + Cycles ProbeRequestStartTime, default="0", desc="the time the dir started the probe request"; + bool DemandRequest, default="false", desc="For profiling purposes"; + + bool L3Hit, default="false", desc="Did memory or L3 supply the data?"; + MachineID OriginalResponder, desc="Mach which wrote the data to the L3"; + + bool NotCached, default="false", desc="True when the Region buffer has already evicted the line"; + + bool NoAckNeeded, default="false", desc="For short circuting acks"; + bool isValid, default="false", desc="Is acked block valid"; + + bool functionalRead(Packet *pkt) { + // Only PUTX messages contains the data block + if (Type == CoherenceResponseType:CPUData || + Type == CoherenceResponseType:MemData) { + return testAndRead(addr, DataBlk, pkt); + } + + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return testAndWrite(addr, DataBlk, pkt); + } +} + +structure(UnblockMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + NetDest Destination, desc="Destination (always directory)"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +enumeration(TriggerType, desc="Trigger Type") { + L2_to_L1, desc="L2 to L1 fill"; + AcksComplete, desc="NB received all needed Acks"; + + // For regions + InvNext, desc="Invalidate the next block"; + PrivateAck, desc="Loopback ack for machines with no Region Buffer"; + AllOutstanding, desc="All outstanding requests have finished"; + L3Hit, desc="L3 hit in dir"; + + // For region directory once the directory is blocked + InvRegion, desc="Invalidate region"; + DowngradeRegion, desc="downgrade region"; +} + +enumeration(CacheId, desc="Which Cache in the Core") { + L1I, desc="L1 I-cache"; + L1D0, desc="L1 D-cache cluster 0"; + L1D1, desc="L1 D-cache cluster 1"; + NA, desc="Default"; +} + +structure(TriggerMsg, desc="...", interface="Message") { + Addr addr, desc="Address"; + TriggerType Type, desc="Type of trigger"; + CacheId Dest, default="CacheId_NA", desc="Cache to invalidate"; + + bool functionalRead(Packet *pkt) { + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return false; + } + +} diff --git a/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm new file mode 100644 index 000000000..89f7d6fcb --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm @@ -0,0 +1,1368 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Jason Power + */ + +machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol") +: CacheMemory *cacheMemory; // stores only region addresses. Must set block size same as below + bool isOnCPU; + int blocksPerRegion := 64; // 4k regions + Cycles toDirLatency := 5; // Latency to fwd requests to directory + Cycles toRegionDirLatency := 5; // Latency for requests and acks to directory + Cycles nextEvictLatency := 1; // latency added between each block while evicting region + bool noTCCdir := "False"; + int TCC_select_num_bits := 1; + + // From the Cores + MessageBuffer * requestFromCore, network="From", virtual_network="0", vnet_type="request"; + MessageBuffer * responseFromCore, network="From", virtual_network="2", vnet_type="response"; + + // Requests to the cores or directory + MessageBuffer * requestToNetwork, network="To", virtual_network="0", vnet_type="request"; + + // From Region-Dir + MessageBuffer * notifyFromRegionDir, network="From", virtual_network="7", vnet_type="request"; + MessageBuffer * probeFromRegionDir, network="From", virtual_network="8", vnet_type="request"; + + // From the directory + MessageBuffer * unblockFromDir, network="From", virtual_network="4", vnet_type="unblock"; + + // To the region-Dir + MessageBuffer * responseToRegDir, network="To", virtual_network="2", vnet_type="response"; + + MessageBuffer * triggerQueue; +{ + + // States + state_declaration(State, desc="Region states", default="RegionBuffer_State_NP") { + NP, AccessPermission:Invalid, desc="Not present in region directory"; + P, AccessPermission:Invalid, desc="Region is private to the cache"; + S, AccessPermission:Invalid, desc="Region is possibly shared with others"; + + NP_PS, AccessPermission:Invalid, desc="Intermediate state waiting for notify from r-dir"; + S_P, AccessPermission:Invalid, desc="Intermediate state while upgrading region"; + + P_NP, AccessPermission:Invalid, desc="Intermediate state while evicting all lines in region"; + P_S, AccessPermission:Invalid, desc="Intermediate state while downgrading all lines in region"; + + S_NP_PS, AccessPermission:Invalid, desc="Got an inv in S_P, waiting for all inv acks, then going to since the write is already out there NP_PS"; + P_NP_NP, AccessPermission:Invalid, desc="Evicting region on repl, then got an inv. Need to re-evict"; + + P_NP_O, AccessPermission:Invalid, desc="Waiting for all outstanding requests"; + P_S_O, AccessPermission:Invalid, desc="Waiting for all outstanding requests"; + S_O, AccessPermission:Invalid, desc="Waiting for all outstanding requests"; + S_NP_PS_O, AccessPermission:Invalid, desc="Waiting for all outstanding requests"; + + SS_P, AccessPermission:Invalid, desc="Waiting for CPU write that we know is there"; + + P_NP_W, AccessPermission:Invalid, desc="Waiting for writeback ack"; + + NP_W, AccessPermission:Invalid, desc="Got a done ack before request, waiting for that victim"; + } + + enumeration(Event, desc="Region directory events") { + CPURead, desc="Access from CPU core"; + CPUWrite, desc="Access from CPU core"; + CPUWriteback, desc="Writeback request from CPU core"; + + ReplRegion, desc="Start a replace on a region"; + + PrivateNotify, desc="Update entry to private state"; + SharedNotify, desc="Update entry to shared state"; + WbNotify, desc="Writeback notification received"; + InvRegion, desc="Start invalidating a region"; + DowngradeRegion,desc="Start invalidating a region"; + + InvAck, desc="Ack from core"; + + DoneAck, desc="Ack from core that request has finished"; + AllOutstanding, desc="All outstanding requests have now finished"; + + Evict, desc="Loopback to evict each block"; + LastAck_PrbResp, desc="Done eviciting all the blocks, got the last ack from core, now respond to region dir"; + LastAck_CleanWb, desc="Done eviciting all the blocks, got the last ack from core, now start clean writeback (note the dir has already been updated)"; + + StallAccess, desc="Wait for the done ack on the address before proceeding"; + StallDoneAck, desc="Wait for the access on the address before proceeding"; + + StaleRequest, desc="Got a stale victim from the cache, fwd it without incrementing outstanding"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + structure(BoolVec, external="yes") { + bool at(int); + void resize(int); + void clear(); + int size(); + } + + structure(Entry, desc="Region entry", interface="AbstractCacheEntry") { + Addr addr, desc="Base address of this region"; + State RegionState, desc="Region state"; + DataBlock DataBlk, desc="Data for the block (always empty in region buffer)"; + BoolVec ValidBlocks, desc="A vector to keep track of valid blocks"; + int NumValidBlocks, desc="Number of trues in ValidBlocks to avoid iterating"; + BoolVec UsedBlocks, desc="A vector to keep track of blocks ever valid"; + bool dirty, desc="Dirty as best known by the region buffer"; + // This is needed so we don't ack an invalidate until all requests are ordered + int NumOutstandingReqs, desc="Total outstanding private/shared requests"; + BoolVec OutstandingReqs, desc="Blocks that have outstanding private/shared requests"; + bool MustDowngrade, desc="Set when we got a downgrade before the shd or pvt permissions"; + Cycles ProbeRequestTime, default="Cycles(0)", desc="Time region dir started the probe"; + Cycles InitialRequestTime, default="Cycles(0)", desc="Time message was sent to region dir"; + bool MsgSentToDir, desc="True if the current request required a message to the dir"; + bool clearOnDone, default="false", desc="clear valid bit when request completes"; + Addr clearOnDoneAddr, desc="clear valid bit when request completes"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + //int NumValidBlocks, desc="Number of blocks valid so we don't have to count a BoolVec"; + BoolVec ValidBlocks, desc="A vector to keep track of valid blocks"; + bool AllAcksReceived, desc="Got all necessary acks from dir"; + bool DoneEvicting, desc="Done iterating through blocks checking for valids"; + BoolVec AcksReceived, desc="Received acks for theses blocks\n"; + bool SendAck, desc="If true, send an ack to the r-dir at end of inv"; + ProbeRequestType MsgType, desc="Type of message to send while 'evicting' "; + int NumOutstandingReqs, desc="Total outstanding private/shared requests"; + BoolVec OutstandingReqs, desc="Blocks that have outstanding private/shared requests"; + MachineID Requestor, desc="Requestor for three hop transactions"; + bool DemandRequest, default="false", desc="Associated with a demand request"; + Addr DemandAddress, desc="Address for the demand request"; + bool DoneAckReceived, default="false", desc="True if the done ack arrived before the message"; + Addr DoneAckAddr, desc="Address of the done ack received early"; + int OutstandingThreshold, desc="Number of outstanding requests to trigger AllOutstanding on"; + + ProbeRequestType NewMsgType, desc="Type of message to send while 'evicting' "; + MachineID NewRequestor, desc="Requestor for three hop transactions"; + bool NewDemandRequest, default="false", desc="Associated with a demand request"; + Addr NewDemandAddress, desc="Address for the demand request"; + bool dirty, desc="dirty"; + bool AllOutstandingTriggered, default="false", desc="bit for only one all outstanding"; + int OutstandingAcks, default="0", desc="number of acks to wait for"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + // Stores only region addresses + TBETable TBEs, template="<RegionBuffer_TBE>", constructor="m_number_of_TBEs"; + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + int blockBits, default="RubySystem::getBlockSizeBits()"; + int blockBytes, default="RubySystem::getBlockSizeBytes()"; + int regionBits, default="log2(m_blocksPerRegion)"; + + // Functions + + int getRegionOffset(Addr addr) { + if (blocksPerRegion > 1) { + Addr offset := bitSelect(addr, blockBits, regionBits+blockBits-1); + int ret := addressToInt(offset); + assert(ret < blocksPerRegion); + return ret; + } else { + return 0; + } + } + + Addr getRegionBase(Addr addr) { + return maskLowOrderBits(addr, blockBits+regionBits); + } + + Addr getNextBlock(Addr addr) { + Addr a := addr; + return makeNextStrideAddress(a, 1); + } + + MachineID getPeer(MachineID mach, Addr address) { + if (isOnCPU) { + return createMachineID(MachineType:CorePair, intToID(0)); + } else if (noTCCdir) { + return mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + } else { + return createMachineID(MachineType:TCCdir, intToID(0)); + } + } + + bool isOutstanding(TBE tbe, Entry cache_entry, Addr addr) { + if (is_valid(tbe) && tbe.OutstandingReqs.size() > 0) { + DPRINTF(RubySlicc, " outstanding tbe reqs %s %s %d %d\n", + tbe.OutstandingReqs, addr, getRegionOffset(addr), + tbe.OutstandingReqs.at(getRegionOffset(addr))); + return tbe.OutstandingReqs.at(getRegionOffset(addr)); + } else if (is_valid(cache_entry)) { + DPRINTF(RubySlicc, " outstanding cache reqs %s %s %d %d\n", + cache_entry.OutstandingReqs, addr, getRegionOffset(addr), + cache_entry.OutstandingReqs.at(getRegionOffset(addr))); + return cache_entry.OutstandingReqs.at(getRegionOffset(addr)); + } else { + return false; + } + } + + bool isOnGPU() { + if (isOnCPU) { + return false; + } + return true; + } + + bool isRead(CoherenceRequestType type) { + return (type == CoherenceRequestType:RdBlk || type == CoherenceRequestType:RdBlkS || + type == CoherenceRequestType:VicClean); + } + + bool presentOrAvail(Addr addr) { + return cacheMemory.isTagPresent(getRegionBase(addr)) || cacheMemory.cacheAvail(getRegionBase(addr)); + } + + // Returns a region entry! + Entry getCacheEntry(Addr addr), return_by_pointer="yes" { + return static_cast(Entry, "pointer", cacheMemory.lookup(getRegionBase(addr))); + } + + TBE getTBE(Addr addr), return_by_pointer="yes" { + return TBEs.lookup(getRegionBase(addr)); + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + return getCacheEntry(getRegionBase(addr)).DataBlk; + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.RegionState; + } + return State:NP; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + if (is_valid(cache_entry)) { + cache_entry.RegionState := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := getTBE(addr); + if(is_valid(tbe)) { + return RegionBuffer_State_to_permission(tbe.TBEState); + } + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return RegionBuffer_State_to_permission(cache_entry.RegionState); + } + return AccessPermission:NotPresent; + } + + void functionalRead(Addr addr, Packet *pkt) { + functionalMemoryRead(pkt); + } + + int functionalWrite(Addr addr, Packet *pkt) { + if (functionalMemoryWrite(pkt)) { + return 1; + } else { + return 0; + } + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(RegionBuffer_State_to_permission(state)); + } + } + + void recordRequestType(RequestType stat, Addr addr) { + if (stat == RequestType:TagArrayRead) { + cacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (stat == RequestType:TagArrayWrite) { + cacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:TagArrayRead) { + return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + + // Overloaded outgoing request nework for both probes to cores and reqeusts + // to the directory. + // Fix Me: These forwarded requests need to be on a separate virtual channel + // to avoid deadlock! + out_port(requestNetwork_out, CPURequestMsg, requestToNetwork); + out_port(probeNetwork_out, NBProbeRequestMsg, requestToNetwork); + + out_port(responseNetwork_out, ResponseMsg, responseToRegDir); + + in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=4) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := getTBE(in_msg.addr); + DPRINTF(RubySlicc, "trigger msg: %s (%s)\n", in_msg, getRegionBase(in_msg.addr)); + assert(is_valid(tbe)); + if (in_msg.Type == TriggerType:AcksComplete) { + if (tbe.SendAck) { + trigger(Event:LastAck_PrbResp, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:LastAck_CleanWb, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == TriggerType:AllOutstanding) { + trigger(Event:AllOutstanding, in_msg.addr, cache_entry, tbe); + } else { + assert(in_msg.Type == TriggerType:InvNext); + trigger(Event:Evict, in_msg.addr, cache_entry, tbe); + } + } + } + } + + in_port(unblockNetwork_in, UnblockMsg, unblockFromDir, rank=3) { + if (unblockNetwork_in.isReady(clockEdge())) { + peek(unblockNetwork_in, UnblockMsg) { + TBE tbe := getTBE(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.DoneAck) { + if (isOutstanding(tbe, cache_entry, in_msg.addr)) { + trigger(Event:DoneAck, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:StallDoneAck, in_msg.addr, cache_entry, tbe); + } + } else { + assert(is_valid(tbe)); + trigger(Event:InvAck, in_msg.addr, cache_entry, tbe); + } + } + } + } + + in_port(probeNetwork_in, NBProbeRequestMsg, probeFromRegionDir, rank=2) { + if (probeNetwork_in.isReady(clockEdge())) { + peek(probeNetwork_in, NBProbeRequestMsg) { + TBE tbe := getTBE(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + assert(getRegionBase(in_msg.addr) == in_msg.addr); + if (in_msg.Type == ProbeRequestType:PrbInv) { + trigger(Event:InvRegion, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { + trigger(Event:DowngradeRegion, in_msg.addr, cache_entry, tbe); + } else { + error("Unknown probe message\n"); + } + } + } + } + + in_port(notifyNetwork_in, CPURequestMsg, notifyFromRegionDir, rank=1) { + if (notifyNetwork_in.isReady(clockEdge())) { + peek(notifyNetwork_in, CPURequestMsg) { + TBE tbe := getTBE(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + //Fix Me...add back in: assert(is_valid(cache_entry)); + if (in_msg.Type == CoherenceRequestType:WbNotify) { + trigger(Event:WbNotify, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:SharedNotify) { + trigger(Event:SharedNotify, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:PrivateNotify) { + trigger(Event:PrivateNotify, in_msg.addr, cache_entry, tbe); + } else { + error("Unknown notify message\n"); + } + } + } + } + + // In from cores + // NOTE: We get the cache / TBE entry based on the region address, + // but pass the block address to the actions + in_port(requestNetwork_in, CPURequestMsg, requestFromCore, rank=0) { + if (requestNetwork_in.isReady(clockEdge())) { + peek(requestNetwork_in, CPURequestMsg) { + TBE tbe := getTBE(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (is_valid(tbe) && tbe.DoneAckReceived && tbe.DoneAckAddr == in_msg.addr) { + DPRINTF(RubySlicc, "Stale/Stall request %s\n", in_msg.Type); + if (in_msg.Type == CoherenceRequestType:VicDirty || in_msg.Type == CoherenceRequestType:VicClean ) + { + trigger(Event:StaleRequest, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:StallAccess, in_msg.addr, cache_entry, tbe); + } + } else if (isOutstanding(tbe, cache_entry, in_msg.addr)) { + DPRINTF(RubySlicc, "Stall outstanding request %s\n", in_msg.Type); + trigger(Event:StallAccess, in_msg.addr, cache_entry, tbe); + } else { + if (presentOrAvail(in_msg.addr)) { + if (in_msg.Type == CoherenceRequestType:RdBlkM ) { + trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:WriteThrough ) { + trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:Atomic ) { + trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); + } else { + if (in_msg.Type == CoherenceRequestType:VicDirty || + in_msg.Type == CoherenceRequestType:VicClean) { + trigger(Event:CPUWriteback, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:CPURead, in_msg.addr, cache_entry, tbe); + } + } + } else { + Addr victim := cacheMemory.cacheProbe(getRegionBase(in_msg.addr)); + TBE victim_tbe := getTBE(victim); + Entry victim_entry := getCacheEntry(victim); + DPRINTF(RubySlicc, "Replacing region %s for %s(%s)\n", victim, in_msg.addr, getRegionBase(in_msg.addr)); + trigger(Event:ReplRegion, victim, victim_entry, victim_tbe); + } + } + } + } + } + + // Actions + action(f_fwdReqToDir, "f", desc="Forward CPU request to directory") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) { + out_msg.addr := in_msg.addr; + out_msg.Type := in_msg.Type; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Dirty := in_msg.Dirty; + out_msg.Requestor := in_msg.Requestor; + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Shared := in_msg.Shared; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.Private := true; + out_msg.InitialRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := curCycle(); + if (getState(tbe, cache_entry, address) == State:S) { + out_msg.ForceShared := true; + } + DPRINTF(RubySlicc, "Fwd: %s\n", out_msg); + //assert(getState(tbe, cache_entry, address) == State:P || getState(tbe, cache_entry, address) == State:S); + if (getState(tbe, cache_entry, address) == State:NP_W) { + APPEND_TRANSITION_COMMENT(" fwding stale request: "); + APPEND_TRANSITION_COMMENT(out_msg.Type); + } + } + } + } + + action(u_updateRegionEntry, "u", desc="Update the entry for profiling") { + peek(requestNetwork_in, CPURequestMsg) { + if (is_valid(cache_entry)) { + if (in_msg.CtoDSinked == false) { + APPEND_TRANSITION_COMMENT(" incr outstanding "); + cache_entry.NumOutstandingReqs := 1 + cache_entry.NumOutstandingReqs; + assert(cache_entry.OutstandingReqs.at(getRegionOffset(address)) == false); + cache_entry.OutstandingReqs.at(getRegionOffset(address)) := true; + assert(cache_entry.NumOutstandingReqs == countBoolVec(cache_entry.OutstandingReqs)); + } else { + APPEND_TRANSITION_COMMENT(" NOT incr outstanding "); + assert(in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:RdBlkS); + } + APPEND_TRANSITION_COMMENT(cache_entry.NumOutstandingReqs); + if (in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:WriteThrough ) + { + cache_entry.dirty := true; + } + if (in_msg.Type == CoherenceRequestType:VicDirty || + in_msg.Type == CoherenceRequestType:VicClean) { + DPRINTF(RubySlicc, "Got %s for addr %s\n", in_msg.Type, address); + //assert(cache_entry.ValidBlocks.at(getRegionOffset(address))); + // can in fact be inv if core got an inv after a vicclean before it got here + if (cache_entry.ValidBlocks.at(getRegionOffset(address))) { + cache_entry.clearOnDone := true; + cache_entry.clearOnDoneAddr := address; + //cache_entry.ValidBlocks.at(getRegionOffset(address)) := false; + //cache_entry.NumValidBlocks := cache_entry.NumValidBlocks - 1; + } + } else { + if (cache_entry.ValidBlocks.at(getRegionOffset(address)) == false) { + cache_entry.NumValidBlocks := cache_entry.NumValidBlocks + 1; + } + DPRINTF(RubySlicc, "before valid addr %s bits %s\n", + in_msg.Type, address, cache_entry.ValidBlocks); + cache_entry.ValidBlocks.at(getRegionOffset(address)) := true; + DPRINTF(RubySlicc, "after valid addr %s bits %s\n", + in_msg.Type, address, cache_entry.ValidBlocks); + cache_entry.UsedBlocks.at(getRegionOffset(address)) := true; + } + assert(cache_entry.NumValidBlocks <= blocksPerRegion); + assert(cache_entry.NumValidBlocks >= 0); + APPEND_TRANSITION_COMMENT(" valid blocks "); + APPEND_TRANSITION_COMMENT(cache_entry.ValidBlocks); + } else { + error("This shouldn't happen anymore I think"); + //tbe.ValidBlocks.at(getRegionOffest(address)) := true; + assert(getState(tbe, cache_entry, address) == State:P_NP); + } + } + } + + action(uw_updatePossibleWriteback, "uw", desc="writeback request complete") { + peek(unblockNetwork_in, UnblockMsg) { + if (is_valid(cache_entry) && in_msg.validToInvalid && + cache_entry.clearOnDone && cache_entry.clearOnDoneAddr == address) { + DPRINTF(RubySlicc, "I have no idea what is going on here\n"); + cache_entry.ValidBlocks.at(getRegionOffset(address)) := false; + cache_entry.NumValidBlocks := cache_entry.NumValidBlocks - 1; + cache_entry.clearOnDone := false; + } + } + } + + + action(rp_requestPrivate, "rp", desc="Send private request r-dir") { + peek(requestNetwork_in, CPURequestMsg) { + // No need to send acks on replacements + assert(is_invalid(tbe)); + enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) { + out_msg.addr := address; // use the actual address so the demand request can be fulfilled + out_msg.DemandAddress := address; + out_msg.Type := CoherenceRequestType:PrivateRequest; + out_msg.OriginalType := in_msg.Type; + out_msg.Requestor := machineID; + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.InitialRequestTime := curCycle(); + // will this always be ok? probably not for multisocket + out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + DPRINTF(RubySlicc, "Private request %s\n", out_msg); + } + cache_entry.ProbeRequestTime := curCycle(); + cache_entry.MsgSentToDir := true; + APPEND_TRANSITION_COMMENT(getRegionBase(address)); + } + } + + action(ru_requestUpgrade, "ru", desc="Send upgrade request r-dir") { + peek(requestNetwork_in, CPURequestMsg) { + // No need to send acks on replacements + assert(is_invalid(tbe)); + enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) { + out_msg.addr := address; // use the actual address so the demand request can be fulfilled + out_msg.Type := CoherenceRequestType:UpgradeRequest; + out_msg.OriginalType := in_msg.Type; + out_msg.Requestor := machineID; + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.InitialRequestTime := curCycle(); + // will this always be ok? probably not for multisocket + out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + cache_entry.ProbeRequestTime := curCycle(); + cache_entry.MsgSentToDir := true; + APPEND_TRANSITION_COMMENT(getRegionBase(address)); + } + } + + action(rw_requestWriteback, "rq", desc="Send writeback request") { + // No need to send acks on replacements + enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) { + out_msg.addr := getRegionBase(address); // use the actual address so the demand request can be fulfilled + out_msg.Type := CoherenceRequestType:CleanWbRequest; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Dirty := tbe.dirty; + APPEND_TRANSITION_COMMENT(getRegionBase(address)); + } + } + + action(rs_requestShared, "rs", desc="Send shared request r-dir") { + peek(requestNetwork_in, CPURequestMsg) { + // No need to send acks on replacements + assert(is_invalid(tbe)); + enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) { + out_msg.addr := address; // use the actual address so the demand request can be fulfilled + out_msg.Type := CoherenceRequestType:SharedRequest; + out_msg.OriginalType := in_msg.Type; + out_msg.Requestor := machineID; + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.InitialRequestTime := curCycle(); + // will this always be ok? probably not for multisocket + out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + cache_entry.ProbeRequestTime := curCycle(); + cache_entry.MsgSentToDir := true; + APPEND_TRANSITION_COMMENT(getRegionBase(address)); + } + } + + action(ai_ackRegionInv, "ai", desc="Send ack to r-dir on region inv if tbe says so") { + // No need to send acks on replacements + assert(is_valid(tbe)); + enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(ad_ackDircetory, "ad", desc="send probe response to directory") { + if (noTCCdir && tbe.MsgType == ProbeRequestType:PrbDowngrade && isOnGPU()) { //VIPER tcc doesnt understand PrbShrData + assert(tbe.DemandRequest); //So, let RegionBuffer take care of sending back ack + enqueue(responseNetwork_out, ResponseMsg, toDirLatency) { + out_msg.addr := tbe.DemandAddress; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := getPeer(machineID,address); + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Dirty := false; // only true if sending back data i think + out_msg.Hit := false; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.NoAckNeeded := true; + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + + action(aie_ackRegionExclusiveInv, "aie", desc="Send ack to r-dir on region inv if tbe says so") { + // No need to send acks on replacements + assert(is_valid(tbe)); + enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.NotCached := true; + out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.Dirty := tbe.dirty; + } + } + + action(ain_ackRegionInvNow, "ain", desc="Send ack to r-dir on region inv") { + enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(aine_ackRegionInvExlusiveNow, "aine", desc="Send ack to r-dir on region inv with exlusive permission") { + enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceResponseType:CPUPrbResp; + out_msg.Sender := machineID; + out_msg.NotCached := true; + out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(ap_ackPrivateNotify, "ap", desc="Send ack to r-dir on private notify") { + enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceResponseType:PrivateAck; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(aw_ackWbNotify, "aw", desc="Send ack to r-dir on writeback notify") { + peek(notifyNetwork_in, CPURequestMsg) { + if (in_msg.NoAckNeeded == false) { + enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceResponseType:RegionWbAck; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + } + + action(e_evictCurrent, "e", desc="Evict this block in the region") { + // send force invalidate message to directory to invalidate this block + // must invalidate all blocks since region buffer could have privitized it + if (tbe.ValidBlocks.at(getRegionOffset(address)) && + (tbe.DemandRequest == false || tbe.DemandAddress != address)) { + DPRINTF(RubySlicc, "trying to evict address %s (base: %s, offset: %d)\n", address, getRegionBase(address), getRegionOffset(address)); + DPRINTF(RubySlicc, "tbe valid blocks %s\n", tbe.ValidBlocks); + + enqueue(probeNetwork_out, NBProbeRequestMsg, 1) { + out_msg.addr := address; + out_msg.Type := tbe.MsgType; + out_msg.ReturnData := true; + if (address == tbe.DemandAddress) { + out_msg.DemandRequest := true; + } + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.add(getPeer(machineID,address)); + DPRINTF(RubySlicc, "%s\n", out_msg); + } + APPEND_TRANSITION_COMMENT(" current "); + APPEND_TRANSITION_COMMENT(tbe.ValidBlocks.at(getRegionOffset(address))); + tbe.AllAcksReceived := false; + } else { + DPRINTF(RubySlicc, "Not evicting demand %s\n", address); + } + } + + action(ed_evictDemand, "ed", desc="Evict the demand request if it's valid") { + if (noTCCdir && tbe.MsgType == ProbeRequestType:PrbDowngrade && isOnGPU()) { + tbe.OutstandingAcks := 0; + tbe.AllAcksReceived := true; + tbe.DoneEvicting := true; + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.Type := TriggerType:AcksComplete; + out_msg.addr := getRegionBase(address); + } + } else if (tbe.DemandRequest) { + enqueue(probeNetwork_out, NBProbeRequestMsg, 1) { + out_msg.addr := tbe.DemandAddress; + out_msg.Type := tbe.MsgType; + out_msg.ReturnData := true; + out_msg.DemandRequest := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.add(getPeer(machineID,address)); + DPRINTF(RubySlicc, "%s\n", out_msg); + tbe.AllAcksReceived := false; + } + if (tbe.ValidBlocks.at(getRegionOffset(tbe.DemandAddress)) == false) { + tbe.OutstandingAcks := tbe.OutstandingAcks + 1; + } + APPEND_TRANSITION_COMMENT("Evicting demand "); + APPEND_TRANSITION_COMMENT(tbe.DemandAddress); + } + APPEND_TRANSITION_COMMENT("waiting acks "); + APPEND_TRANSITION_COMMENT(tbe.OutstandingAcks); + } + + action(adp_AckDemandProbe, "fp", desc="forward demand probe even if we know that the core is invalid") { + peek(probeNetwork_in, NBProbeRequestMsg) { + if (in_msg.DemandRequest) { + enqueue(responseNetwork_out, ResponseMsg, toDirLatency) { + out_msg.addr := in_msg.DemandAddress; + out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes + out_msg.Sender := getPeer(machineID,address); + out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Dirty := false; // only true if sending back data i think + out_msg.Hit := false; + out_msg.Ntsl := false; + out_msg.State := CoherenceState:NA; + out_msg.NoAckNeeded := true; + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } + } + + action(en_enqueueNextEvict, "en", desc="Queue evict the next block in the region") { + // increment in_msg.addr by blockSize bytes and enqueue on triggerPort + // Only enqueue if the next address doesn't overrun the region bound + if (getRegionBase(getNextBlock(address)) == getRegionBase(address)) { + enqueue(triggerQueue_out, TriggerMsg, nextEvictLatency) { + out_msg.Type := TriggerType:InvNext; + out_msg.addr := getNextBlock(address); + } + } else { + tbe.DoneEvicting := true; + DPRINTF(RubySlicc, "Done evicing region %s\n", getRegionBase(address)); + DPRINTF(RubySlicc, "Waiting for %s acks\n", tbe.OutstandingAcks); + if (tbe.AllAcksReceived == true) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.Type := TriggerType:AcksComplete; + out_msg.addr := getRegionBase(address); + } + } + } + } + + action(ef_enqueueFirstEvict, "ef", desc="Queue the first block in the region to be evicted") { + if (tbe.DoneEvicting == false) { + enqueue(triggerQueue_out, TriggerMsg, nextEvictLatency) { + out_msg.Type := TriggerType:InvNext; + out_msg.addr := getRegionBase(address); + } + } + } + + action(ra_receiveAck, "ra", desc="Mark TBE entry as received this ack") { + DPRINTF(RubySlicc, "received ack for %s reg: %s vec: %s pos: %d\n", + address, getRegionBase(address), tbe.ValidBlocks, getRegionOffset(address)); + peek(unblockNetwork_in, UnblockMsg) { + // + // Note the tbe ValidBlock vec will be a conservative list of the + // valid blocks since the cache entry ValidBlock vec is set on the + // request + // + if (in_msg.wasValid) { + assert(tbe.ValidBlocks.at(getRegionOffset(address))); + } + } + tbe.OutstandingAcks := tbe.OutstandingAcks - 1; + tbe.AcksReceived.at(getRegionOffset(address)) := true; + assert(tbe.OutstandingAcks >= 0); + if (tbe.OutstandingAcks == 0) { + tbe.AllAcksReceived := true; + if (tbe.DoneEvicting) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.Type := TriggerType:AcksComplete; + out_msg.addr := getRegionBase(address); + } + } + } + + APPEND_TRANSITION_COMMENT(getRegionBase(address)); + APPEND_TRANSITION_COMMENT(" Acks left receive "); + APPEND_TRANSITION_COMMENT(tbe.OutstandingAcks); + } + + action(do_decrementOutstanding, "do", desc="Decrement outstanding requests") { + APPEND_TRANSITION_COMMENT(" decr outstanding "); + if (is_valid(cache_entry)) { + cache_entry.NumOutstandingReqs := cache_entry.NumOutstandingReqs - 1; + assert(cache_entry.OutstandingReqs.at(getRegionOffset(address))); + cache_entry.OutstandingReqs.at(getRegionOffset(address)) := false; + assert(cache_entry.NumOutstandingReqs >= 0); + assert(cache_entry.NumOutstandingReqs == countBoolVec(cache_entry.OutstandingReqs)); + APPEND_TRANSITION_COMMENT(cache_entry.NumOutstandingReqs); + } + if (is_valid(tbe)) { + tbe.NumOutstandingReqs := tbe.NumOutstandingReqs - 1; + assert(tbe.OutstandingReqs.at(getRegionOffset(address))); + tbe.OutstandingReqs.at(getRegionOffset(address)) := false; + assert(tbe.NumOutstandingReqs >= 0); + assert(tbe.NumOutstandingReqs == countBoolVec(tbe.OutstandingReqs)); + APPEND_TRANSITION_COMMENT(tbe.NumOutstandingReqs); + } + } + + action(co_checkOutstanding, "co", desc="check if there are no more outstanding requests") { + assert(is_valid(tbe)); + if ((tbe.NumOutstandingReqs <= tbe.OutstandingThreshold) && + (tbe.AllOutstandingTriggered == false)) { + APPEND_TRANSITION_COMMENT(" no more outstanding: "); + APPEND_TRANSITION_COMMENT(tbe.NumOutstandingReqs); + APPEND_TRANSITION_COMMENT(tbe.OutstandingThreshold); + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.Type := TriggerType:AllOutstanding; + if (tbe.DemandRequest) { + out_msg.addr := tbe.DemandAddress; + } else { + out_msg.addr := getRegionBase(address); + } + DPRINTF(RubySlicc, "co enqueuing %s\n", out_msg); + tbe.AllOutstandingTriggered := true; + } + } else { + APPEND_TRANSITION_COMMENT(" still more outstanding "); + } + } + + action(ro_resetAllOutstanding, "ro", desc="Reset all outstanding") { + tbe.AllOutstandingTriggered := false; + } + + action(so_setOutstandingCheckOne, "so", desc="Check outstanding is waiting for 1, not 0") { + // Need this for S_P because one request is outstanding between here and r-dir + tbe.OutstandingThreshold := 1; + } + + action(a_allocateRegionEntry, "a", desc="Allocate a new entry") { + set_cache_entry(cacheMemory.allocate(getRegionBase(address), new Entry)); + cache_entry.ValidBlocks.clear(); + cache_entry.ValidBlocks.resize(blocksPerRegion); + cache_entry.UsedBlocks.clear(); + cache_entry.UsedBlocks.resize(blocksPerRegion); + cache_entry.dirty := false; + cache_entry.NumOutstandingReqs := 0; + cache_entry.OutstandingReqs.clear(); + cache_entry.OutstandingReqs.resize(blocksPerRegion); + } + + action(d_deallocateRegionEntry, "d", desc="Deallocate region entry") { + cacheMemory.deallocate(getRegionBase(address)); + unset_cache_entry(); + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + TBEs.allocate(getRegionBase(address)); + set_tbe(getTBE(address)); + tbe.OutstandingAcks := 0; + tbe.AllAcksReceived := true; // starts true since the region could be empty + tbe.DoneEvicting := false; + tbe.AcksReceived.clear(); + tbe.AcksReceived.resize(blocksPerRegion); + tbe.SendAck := false; + tbe.OutstandingThreshold := 0; + if (is_valid(cache_entry)) { + tbe.NumOutstandingReqs := cache_entry.NumOutstandingReqs; + tbe.OutstandingReqs := cache_entry.OutstandingReqs; + assert(tbe.NumOutstandingReqs == countBoolVec(tbe.OutstandingReqs)); + tbe.dirty := cache_entry.dirty; + tbe.ValidBlocks := cache_entry.ValidBlocks; + tbe.OutstandingAcks := countBoolVec(tbe.ValidBlocks); + APPEND_TRANSITION_COMMENT(" tbe valid blocks "); + APPEND_TRANSITION_COMMENT(tbe.ValidBlocks); + APPEND_TRANSITION_COMMENT(" cache valid blocks "); + APPEND_TRANSITION_COMMENT(cache_entry.ValidBlocks); + } else { + tbe.dirty := false; + } + } + + action(m_markSendAck, "m", desc="Mark TBE that we need to ack at end") { + assert(is_valid(tbe)); + tbe.SendAck := true; + } + + action(db_markDirtyBit, "db", desc="Mark TBE dirty bit") { + peek(unblockNetwork_in, UnblockMsg) { + if (is_valid(tbe)) { + tbe.dirty := tbe.dirty || in_msg.Dirty; + } + } + } + + action(dr_markDoneAckReceived, "dr", desc="Mark TBE that a done ack has been received") { + assert(is_valid(tbe)); + tbe.DoneAckReceived := true; + tbe.DoneAckAddr := address; + APPEND_TRANSITION_COMMENT(" marking done ack on TBE "); + } + + action(se_setTBE, "se", desc="Set msg type to evict") { + peek(probeNetwork_in, NBProbeRequestMsg) { + tbe.MsgType := in_msg.Type; + tbe.Requestor := in_msg.Requestor; + tbe.DemandAddress := in_msg.DemandAddress; + tbe.DemandRequest := in_msg.DemandRequest; + } + } + + action(sne_setNewTBE, "sne", desc="Set msg type to evict") { + peek(probeNetwork_in, NBProbeRequestMsg) { + tbe.NewMsgType := in_msg.Type; + tbe.NewRequestor := in_msg.Requestor; + tbe.NewDemandAddress := in_msg.DemandAddress; + tbe.NewDemandRequest := in_msg.DemandRequest; + } + } + + action(soe_setOldTBE, "soe", desc="Set msg type to evict") { + tbe.MsgType := tbe.NewMsgType; + tbe.Requestor := tbe.NewRequestor; + tbe.DemandAddress := tbe.NewDemandAddress; + tbe.DemandRequest := tbe.NewDemandRequest; + tbe.OutstandingAcks := countBoolVec(tbe.ValidBlocks); + tbe.AllAcksReceived := true; // starts true since the region could be empty + tbe.DoneEvicting := false; + tbe.AcksReceived.clear(); + tbe.AcksReceived.resize(blocksPerRegion); + tbe.SendAck := false; + } + + action(ser_setTBE, "ser", desc="Set msg type to evict repl") { + tbe.MsgType := ProbeRequestType:PrbInv; + } + + action(md_setMustDowngrade, "md", desc="When permissions finally get here, must be shared") { + assert(is_valid(cache_entry)); + cache_entry.MustDowngrade := true; + } + + action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") { + TBEs.deallocate(getRegionBase(address)); + unset_tbe(); + } + + action(p_popRequestQueue, "p", desc="Pop the request queue") { + requestNetwork_in.dequeue(clockEdge()); + } + + action(pl_popUnblockQueue, "pl", desc="Pop the unblock queue") { + unblockNetwork_in.dequeue(clockEdge()); + } + + action(pn_popNotifyQueue, "pn", desc="Pop the notify queue") { + notifyNetwork_in.dequeue(clockEdge()); + } + + action(pp_popProbeQueue, "pp", desc="Pop the probe queue") { + probeNetwork_in.dequeue(clockEdge()); + } + + action(pt_popTriggerQueue, "pt", desc="Pop the trigger queue") { + DPRINTF(RubySlicc, "Trigger Before Contents: %s\n", triggerQueue_in); + triggerQueue_in.dequeue(clockEdge()); + DPRINTF(RubySlicc, "Trigger After Contents: %s\n", triggerQueue_in); + } + + // Must always use wake all, since non-region address wait on region addresses + action(wa_wakeUpAllDependents, "wa", desc="Wake up any requests waiting for this region") { + wakeUpAllBuffers(); + } + + action(zz_stallAndWaitRequestQueue, "\z", desc="recycle request queue") { + Addr regAddr := getRegionBase(address); + DPRINTF(RubySlicc, "Stalling address %s\n", regAddr); + stall_and_wait(requestNetwork_in, regAddr); + } + + action(yy_stallAndWaitProbeQueue, "\y", desc="stall probe queue") { + Addr regAddr := getRegionBase(address); + stall_and_wait(probeNetwork_in, regAddr); + } + + action(yyy_recycleProbeQueue, "\yy", desc="recycle probe queue") { + probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(zzz_recycleRequestQueue, "\zz", desc="recycle request queue") { + requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(www_recycleUnblockNetwork, "\ww", desc="recycle unblock queue") { + unblockNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(z_stall, "z", desc="stall request queue") { + // fake state + } + + action(mru_setMRU, "mru", desc="set MRU") { + cacheMemory.setMRU(address, cache_entry.NumValidBlocks); + } + + // Transitions + + transition({NP_PS, S_P, S_NP_PS, P_NP, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, P_NP_W, P_NP_NP, NP_W}, {CPURead, CPUWriteback, CPUWrite}) {} { + zz_stallAndWaitRequestQueue; + } + + transition(SS_P, {CPURead, CPUWriteback}) { + zz_stallAndWaitRequestQueue; + } + + transition({NP, S, P, NP_PS, S_P, S_NP_PS, P_NP, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, SS_P, NP_W, P_NP_NP}, StallAccess) {} { + zz_stallAndWaitRequestQueue; + } + + transition({S, P, NP_PS, S_P, S_NP_PS, P_NP, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, SS_P, P_NP_W, P_NP_NP, NP_W}, StallDoneAck) { + www_recycleUnblockNetwork; + } + + transition(NP, StallDoneAck, NP_W) { + t_allocateTBE; + db_markDirtyBit; + dr_markDoneAckReceived; + pl_popUnblockQueue; + } + + transition(NP_W, StaleRequest, NP) { + f_fwdReqToDir; + dt_deallocateTBE; + wa_wakeUpAllDependents; + p_popRequestQueue; + } + + transition(P_NP_O, DowngradeRegion) {} { + z_stall; // should stall and wait + } + + transition({NP_PS, S_NP_PS, S_P, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, SS_P}, ReplRegion) {} { + zz_stallAndWaitRequestQueue; // can't let things get out of order! + } + + transition({P_NP_O, S_O, SS_P}, InvRegion) {} { + yyy_recycleProbeQueue; // can't be z_stall because there could be a RdBlkM in the requestQueue which has the sinked flag which is blocking the inv + } + + transition(P_NP, {InvRegion, DowngradeRegion}, P_NP_NP) {} { + sne_setNewTBE; + pp_popProbeQueue; + } + + transition(S_P, DowngradeRegion) {} { + adp_AckDemandProbe; + ain_ackRegionInvNow; + pp_popProbeQueue; + } + + transition(P_NP_W, InvRegion) { + adp_AckDemandProbe; + ain_ackRegionInvNow; + pp_popProbeQueue; + } + + transition(P_NP_W, DowngradeRegion) { + adp_AckDemandProbe; + aine_ackRegionInvExlusiveNow; + pp_popProbeQueue; + } + + transition({P, S}, {CPURead, CPUWriteback}) {TagArrayRead, TagArrayWrite} { + mru_setMRU; + f_fwdReqToDir; + u_updateRegionEntry; + p_popRequestQueue; + } + + transition(P, CPUWrite) {TagArrayRead, TagArrayWrite} { + mru_setMRU; + f_fwdReqToDir; + u_updateRegionEntry; + p_popRequestQueue; + } + + transition(S, CPUWrite, S_O) {TagArrayRead} { + mru_setMRU; + t_allocateTBE; + co_checkOutstanding; + zz_stallAndWaitRequestQueue; + } + + transition(S_O, AllOutstanding, SS_P) { + wa_wakeUpAllDependents; + ro_resetAllOutstanding; + pt_popTriggerQueue; + } + + transition(SS_P, CPUWrite, S_P) { + mru_setMRU; + dt_deallocateTBE; + ru_requestUpgrade; + u_updateRegionEntry; + p_popRequestQueue; + } + + transition(NP, {CPURead, CPUWriteback}, NP_PS) {TagArrayRead, TagArrayWrite} { + a_allocateRegionEntry; + rs_requestShared; + u_updateRegionEntry; + p_popRequestQueue;//zz_stallAndWaitRequestQueue; + } + + transition(NP, CPUWrite, NP_PS) {TagArrayRead, TagArrayWrite} { + a_allocateRegionEntry; + rp_requestPrivate; + u_updateRegionEntry; + p_popRequestQueue;//zz_stallAndWaitRequestQueue; + } + + transition(NP_PS, PrivateNotify, P) {} { + ap_ackPrivateNotify; + wa_wakeUpAllDependents; + pn_popNotifyQueue; + } + + transition(S_P, PrivateNotify, P) {} { + ap_ackPrivateNotify; + wa_wakeUpAllDependents; + pn_popNotifyQueue; + } + + transition(NP_PS, SharedNotify, S) {} { + ap_ackPrivateNotify; + wa_wakeUpAllDependents; + pn_popNotifyQueue; + } + + transition(P_NP_W, WbNotify, NP) {} { + aw_ackWbNotify; + wa_wakeUpAllDependents; + dt_deallocateTBE; + pn_popNotifyQueue; + } + + transition({P, S}, ReplRegion, P_NP_O) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + ser_setTBE; + d_deallocateRegionEntry; + co_checkOutstanding; + } + + transition({P, S}, InvRegion, P_NP_O) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + se_setTBE; + m_markSendAck; + d_deallocateRegionEntry; + co_checkOutstanding; + pp_popProbeQueue; + } + + transition(P_NP_O, AllOutstanding, P_NP) {} { + ed_evictDemand; + ef_enqueueFirstEvict; + ro_resetAllOutstanding; + pt_popTriggerQueue; + } + + transition(S_P, InvRegion, S_NP_PS_O) {TagArrayRead} { + t_allocateTBE; + se_setTBE; + m_markSendAck; + so_setOutstandingCheckOne; + co_checkOutstanding; + pp_popProbeQueue; + } + + transition(S_NP_PS_O, AllOutstanding, S_NP_PS) { + ed_evictDemand; + ef_enqueueFirstEvict; + ro_resetAllOutstanding; + pt_popTriggerQueue; + } + + transition(P, DowngradeRegion, P_S_O) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + se_setTBE; + m_markSendAck; + co_checkOutstanding; + pp_popProbeQueue; + } + + transition(P_S_O, AllOutstanding, P_S) {} { + ed_evictDemand; + ef_enqueueFirstEvict; + ro_resetAllOutstanding; + pt_popTriggerQueue; + } + + transition({P, S}, DoneAck) {TagArrayWrite} { + do_decrementOutstanding; + wa_wakeUpAllDependents; + db_markDirtyBit; + uw_updatePossibleWriteback; + pl_popUnblockQueue; + } + + transition({S_P, NP_PS, S_NP_PS}, DoneAck) {TagArrayWrite} { + www_recycleUnblockNetwork; + } + + transition({P_NP_O, S_NP_PS_O, P_S_O, S_O}, DoneAck) {} { + do_decrementOutstanding; + co_checkOutstanding; + db_markDirtyBit; + uw_updatePossibleWriteback; + pl_popUnblockQueue; + } + + transition({P_NP, P_S, S_NP_PS, P_NP_NP}, Evict) {} { + e_evictCurrent; + en_enqueueNextEvict; + pt_popTriggerQueue; + } + + transition({P_NP, P_S, S_NP_PS, P_NP_NP}, InvAck) {} { + ra_receiveAck; + db_markDirtyBit; + pl_popUnblockQueue; + } + + transition(P_NP, LastAck_CleanWb, P_NP_W) {} { + rw_requestWriteback; + pt_popTriggerQueue; + } + + transition(P_NP_NP, LastAck_CleanWb, P_NP) {} { + soe_setOldTBE; + m_markSendAck; + ed_evictDemand; + ef_enqueueFirstEvict; + pt_popTriggerQueue; + } + + transition(P_NP, LastAck_PrbResp, NP) {} { + aie_ackRegionExclusiveInv; + dt_deallocateTBE; + wa_wakeUpAllDependents; + pt_popTriggerQueue; + } + + transition(S_NP_PS, LastAck_PrbResp, NP_PS) {} { + aie_ackRegionExclusiveInv; + dt_deallocateTBE; + wa_wakeUpAllDependents; + pt_popTriggerQueue; + } + + transition(P_S, LastAck_PrbResp, S) {} { + ai_ackRegionInv; + ad_ackDircetory; + dt_deallocateTBE; + wa_wakeUpAllDependents; + pt_popTriggerQueue; + } + +} + diff --git a/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm b/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm new file mode 100644 index 000000000..b392311c5 --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm @@ -0,0 +1,1187 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Jason Power + */ + +machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol") +: CacheMemory *cacheMemory; // stores only region addresses. Must set block size same as below + NodeID cpuRegionBufferNum; + NodeID gpuRegionBufferNum; + int blocksPerRegion := 64; // 4k regions + Cycles toDirLatency := 10; // Latency to fwd requests and send invs to directory + bool always_migrate := "False"; + bool sym_migrate := "False"; + bool asym_migrate := "False"; + bool noTCCdir := "False"; + int TCC_select_num_bits := 1; + + // To the directory + MessageBuffer * requestToDir, network="To", virtual_network="5", vnet_type="request"; + + // To the region buffers + MessageBuffer * notifyToRBuffer, network="To", virtual_network="7", vnet_type="request"; + MessageBuffer * probeToRBuffer, network="To", virtual_network="8", vnet_type="request"; + + // From the region buffers + MessageBuffer * responseFromRBuffer, network="From", virtual_network="2", vnet_type="response"; + MessageBuffer * requestFromRegBuf, network="From", virtual_network="0", vnet_type="request"; + + MessageBuffer * triggerQueue; +{ + + // States + state_declaration(State, desc="Region states", default="RegionDir_State_NP") { + NP, AccessPermission:Invalid, desc="Not present in region directory"; + P, AccessPermission:Invalid, desc="Region is private to owner"; + S, AccessPermission:Invalid, desc="Region is shared between CPU and GPU"; + + P_NP, AccessPermission:Invalid, desc="Evicting the region"; + NP_P, AccessPermission:Invalid, desc="Must wait for ack from R-buf"; + NP_S, AccessPermission:Invalid, desc="Must wait for ack from R-buf"; + P_P, AccessPermission:Invalid, desc="Waiting for ack from R-buf"; + S_S, AccessPermission:Invalid, desc="Waiting for ack from R-buf"; + P_S, AccessPermission:Invalid, desc="Downgrading the region"; + S_P, AccessPermission:Invalid, desc="Upgrading the region"; + P_AS, AccessPermission:Invalid, desc="Sent invalidates, waiting for acks"; + S_AP, AccessPermission:Invalid, desc="Sent invalidates, waiting for acks"; + P_AP, AccessPermission:Invalid, desc="Sent invalidates, waiting for acks"; + + SP_NP_W, AccessPermission:Invalid, desc="Last sharer writing back, waiting for ack"; + S_W, AccessPermission:Invalid, desc="Sharer writing back, waiting for ack"; + + P_AP_W, AccessPermission:Invalid, desc="Fwded request to dir, waiting for ack"; + P_AS_W, AccessPermission:Invalid, desc="Fwded request to dir, waiting for ack"; + S_AP_W, AccessPermission:Invalid, desc="Fwded request to dir, waiting for ack"; + } + + enumeration(Event, desc="Region directory events") { + SendInv, desc="Send inv message to any machine that has a region buffer"; + SendUpgrade, desc="Send upgrade message to any machine that has a region buffer"; + SendDowngrade, desc="Send downgrade message to any machine that has a region buffer"; + + Evict, desc="Evict this region"; + + UpgradeRequest, desc="Request from r-buf for an upgrade"; + SharedRequest, desc="Request from r-buf for read"; + PrivateRequest, desc="Request from r-buf for write"; + + InvAckCore, desc="Ack from region buffer to order the invalidate"; + InvAckCoreNoShare, desc="Ack from region buffer to order the invalidate, and it does not have the region"; + CPUPrivateAck, desc="Ack from region buffer to order private notification"; + + LastAck, desc="Done eviciting all the blocks"; + + StaleCleanWbRequest, desc="stale clean writeback reqeust"; + StaleCleanWbRequestNoShare, desc="stale clean wb req from a cache which should be removed from sharers"; + CleanWbRequest, desc="clean writeback reqeust, multiple sharers"; + CleanWbRequest_LastSharer, desc="clean writeback reqeust, last sharer"; + WritebackAck, desc="Writeback Ack from region buffer"; + DirReadyAck, desc="Directory is ready, waiting Ack from region buffer"; + + TriggerInv, desc="trigger invalidate message"; + TriggerDowngrade, desc="trigger downgrade message"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + DataArrayRead, desc="Read the data array"; + DataArrayWrite, desc="Write the data array"; + TagArrayRead, desc="Read the data array"; + TagArrayWrite, desc="Write the data array"; + } + + structure(BoolVec, external="yes") { + bool at(int); + void resize(int); + void clear(); + } + + structure(Entry, desc="Region entry", interface="AbstractCacheEntry") { + Addr addr, desc="Base address of this region"; + NetDest Sharers, desc="Set of machines that are sharing, but not owners"; + State RegionState, desc="Region state"; + DataBlock DataBlk, desc="Data for the block (always empty in region dir)"; + MachineID Owner, desc="Machine which owns all blocks in this region"; + Cycles ProbeStart, desc="Time when the first probe request was issued"; + bool LastWriten, default="false", desc="The last time someone accessed this region, it wrote it"; + bool LastWritenByCpu, default="false", desc="The last time the CPU accessed this region, it wrote it"; + bool LastWritenByGpu, default="false", desc="The last time the GPU accessed this region, it wrote it"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + MachineID Owner, desc="Machine which owns all blocks in this region"; + NetDest Sharers, desc="Set of machines to send evicts"; + int NumValidBlocks, desc="Number of blocks valid so we don't have to count a BoolVec"; + bool AllAcksReceived, desc="Got all necessary acks from dir"; + CoherenceRequestType MsgType, desc="Msg type for the evicts could be inv or dwngrd"; + Cycles ProbeRequestTime, default="Cycles(0)", desc="Start of probe request"; + Cycles InitialRequestTime, default="Cycles(0)", desc="To forward back on out msg"; + Addr DemandAddress, desc="Demand address from original request"; + uint64_t probe_id, desc="probe id for lifetime profiling"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + // Stores only region addresses + TBETable TBEs, template="<RegionDir_TBE>", constructor="m_number_of_TBEs"; + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + void set_cache_entry(AbstractCacheEntry b); + void unset_cache_entry(); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + int blockBits, default="RubySystem::getBlockSizeBits()"; + int blockBytes, default="RubySystem::getBlockSizeBytes()"; + int regionBits, default="log2(m_blocksPerRegion)"; + + // Functions + + MachineID getCoreMachine(MachineID rBuf, Addr address) { + if (machineIDToNodeID(rBuf) == cpuRegionBufferNum) { + return createMachineID(MachineType:CorePair, intToID(0)); + } else if (machineIDToNodeID(rBuf) == gpuRegionBufferNum) { + if (noTCCdir) { + return mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits); + } else { + return createMachineID(MachineType:TCCdir, intToID(0)); + } + } else { + error("Unexpected region buffer number"); + } + } + + bool isCpuMachine(MachineID rBuf) { + if (machineIDToNodeID(rBuf) == cpuRegionBufferNum) { + return true; + } else if (machineIDToNodeID(rBuf) == gpuRegionBufferNum) { + return false; + } else { + error("Unexpected region buffer number"); + } + } + + bool symMigrate(Entry cache_entry) { + return cache_entry.LastWriten; + } + + bool asymMigrate(Entry cache_entry, MachineID requestor) { + if (isCpuMachine(requestor)) { + return cache_entry.LastWritenByCpu; + } else { + return cache_entry.LastWritenByGpu; + } + } + + int getRegionOffset(Addr addr) { + if (blocksPerRegion > 1) { + Addr offset := bitSelect(addr, blockBits, regionBits+blockBits-1); + int ret := addressToInt(offset); + assert(ret < blocksPerRegion); + return ret; + } else { + return 0; + } + } + + Addr getRegionBase(Addr addr) { + return maskLowOrderBits(addr, blockBits+regionBits); + } + + Addr getNextBlock(Addr addr) { + Addr a := addr; + makeNextStrideAddress(a, 1); + return a; + } + + bool presentOrAvail(Addr addr) { + DPRINTF(RubySlicc, "Present? %s, avail? %s\n", cacheMemory.isTagPresent(getRegionBase(addr)), cacheMemory.cacheAvail(getRegionBase(addr))); + return cacheMemory.isTagPresent(getRegionBase(addr)) || cacheMemory.cacheAvail(getRegionBase(addr)); + } + + // Returns a region entry! + Entry getCacheEntry(Addr addr), return_by_pointer="yes" { + return static_cast(Entry, "pointer", cacheMemory.lookup(getRegionBase(addr))); + } + + TBE getTBE(Addr addr), return_by_pointer="yes" { + return TBEs.lookup(getRegionBase(addr)); + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + return getCacheEntry(getRegionBase(addr)).DataBlk; + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.RegionState; + } + return State:NP; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + if (is_valid(cache_entry)) { + cache_entry.RegionState := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := getTBE(addr); + if(is_valid(tbe)) { + return RegionDir_State_to_permission(tbe.TBEState); + } + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + return RegionDir_State_to_permission(cache_entry.RegionState); + } + return AccessPermission:NotPresent; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(RegionDir_State_to_permission(state)); + } + } + + void functionalRead(Addr addr, Packet *pkt) { + functionalMemoryRead(pkt); + } + + int functionalWrite(Addr addr, Packet *pkt) { + if (functionalMemoryWrite(pkt)) { + return 1; + } else { + return 0; + } + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + cacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:DataArrayWrite) { + cacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:TagArrayRead) { + cacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayWrite) { + cacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return cacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return cacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + + out_port(requestNetwork_out, CPURequestMsg, requestToDir); + out_port(notifyNetwork_out, CPURequestMsg, notifyToRBuffer); + out_port(probeNetwork_out, NBProbeRequestMsg, probeToRBuffer); + + in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=2) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + assert(in_msg.addr == getRegionBase(in_msg.addr)); + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := getTBE(in_msg.addr); + DPRINTF(RubySlicc, "trigger msg: %s (%s)\n", in_msg, getRegionBase(in_msg.addr)); + if (in_msg.Type == TriggerType:AcksComplete) { + assert(is_valid(tbe)); + trigger(Event:LastAck, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == TriggerType:InvRegion) { + assert(is_valid(tbe)); + trigger(Event:TriggerInv, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == TriggerType:DowngradeRegion) { + assert(is_valid(tbe)); + trigger(Event:TriggerDowngrade, in_msg.addr, cache_entry, tbe); + } else { + error("Unknown trigger message"); + } + } + } + } + + in_port(responseNetwork_in, ResponseMsg, responseFromRBuffer, rank=1) { + if (responseNetwork_in.isReady(clockEdge())) { + peek(responseNetwork_in, ResponseMsg) { + TBE tbe := getTBE(in_msg.addr); + Entry cache_entry := getCacheEntry(in_msg.addr); + if (in_msg.Type == CoherenceResponseType:CPUPrbResp) { + assert(in_msg.addr == getRegionBase(in_msg.addr)); + assert(is_valid(tbe)); + if (in_msg.NotCached) { + trigger(Event:InvAckCoreNoShare, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:InvAckCore, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:PrivateAck) { + assert(in_msg.addr == getRegionBase(in_msg.addr)); + assert(is_valid(cache_entry)); + //Fix Me...add back in: assert(cache_entry.Sharers.isElement(in_msg.Sender)); + trigger(Event:CPUPrivateAck, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:RegionWbAck) { + //Fix Me...add back in: assert(cache_entry.Sharers.isElement(in_msg.Sender) == false); + assert(in_msg.addr == getRegionBase(in_msg.addr)); + trigger(Event:WritebackAck, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:DirReadyAck) { + assert(is_valid(tbe)); + trigger(Event:DirReadyAck, getRegionBase(in_msg.addr), cache_entry, tbe); + } else { + error("Invalid response type"); + } + } + } + } + + // In from cores + // NOTE: We get the cache / TBE entry based on the region address, + // but pass the block address to the actions + in_port(requestNetwork_in, CPURequestMsg, requestFromRegBuf, rank=0) { + if (requestNetwork_in.isReady(clockEdge())) { + peek(requestNetwork_in, CPURequestMsg) { + //assert(in_msg.addr == getRegionBase(in_msg.addr)); + Addr address := getRegionBase(in_msg.addr); + DPRINTF(RubySlicc, "Got %s, base %s\n", in_msg.addr, address); + if (presentOrAvail(address)) { + TBE tbe := getTBE(address); + Entry cache_entry := getCacheEntry(address); + if (in_msg.Type == CoherenceRequestType:PrivateRequest) { + if (is_valid(cache_entry) && (cache_entry.Owner != in_msg.Requestor || + getState(tbe, cache_entry, address) == State:S)) { + trigger(Event:SendInv, address, cache_entry, tbe); + } else { + trigger(Event:PrivateRequest, address, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:SharedRequest) { + if (is_invalid(cache_entry)) { + // If no one has ever requested this region give private permissions + trigger(Event:PrivateRequest, address, cache_entry, tbe); + } else { + if (always_migrate || + (sym_migrate && symMigrate(cache_entry)) || + (asym_migrate && asymMigrate(cache_entry, in_msg.Requestor))) { + if (cache_entry.Sharers.count() == 1 && + cache_entry.Sharers.isElement(in_msg.Requestor)) { + trigger(Event:UpgradeRequest, address, cache_entry, tbe); + } else { + trigger(Event:SendInv, address, cache_entry, tbe); + } + } else { // don't migrate + if(cache_entry.Sharers.isElement(in_msg.Requestor) || + getState(tbe, cache_entry, address) == State:S) { + trigger(Event:SharedRequest, address, cache_entry, tbe); + } else { + trigger(Event:SendDowngrade, address, cache_entry, tbe); + } + } + } + } else if (in_msg.Type == CoherenceRequestType:UpgradeRequest) { + if (is_invalid(cache_entry)) { + trigger(Event:PrivateRequest, address, cache_entry, tbe); + } else if (cache_entry.Sharers.count() == 1 && cache_entry.Sharers.isElement(in_msg.Requestor)) { + trigger(Event:UpgradeRequest, address, cache_entry, tbe); + } else { + trigger(Event:SendUpgrade, address, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:CleanWbRequest) { + if (is_invalid(cache_entry) || cache_entry.Sharers.isElement(in_msg.Requestor) == false) { + trigger(Event:StaleCleanWbRequest, address, cache_entry, tbe); + } else { + DPRINTF(RubySlicc, "wb address %s(%s) owner %s sharers %s requestor %s %d %d\n", in_msg.addr, getRegionBase(in_msg.addr), cache_entry.Owner, cache_entry.Sharers, in_msg.Requestor, cache_entry.Sharers.isElement(in_msg.Requestor), cache_entry.Sharers.count()); + if (cache_entry.Sharers.isElement(in_msg.Requestor) && cache_entry.Sharers.count() == 1) { + DPRINTF(RubySlicc, "last wb\n"); + trigger(Event:CleanWbRequest_LastSharer, address, cache_entry, tbe); + } else { + DPRINTF(RubySlicc, "clean wb\n"); + trigger(Event:CleanWbRequest, address, cache_entry, tbe); + } + } + } else { + error("unknown region dir request type"); + } + } else { + Addr victim := cacheMemory.cacheProbe(getRegionBase(in_msg.addr)); + TBE victim_tbe := getTBE(victim); + Entry victim_entry := getCacheEntry(victim); + DPRINTF(RubySlicc, "Evicting address %s for new region at address %s(%s)\n", victim, in_msg.addr, getRegionBase(in_msg.addr)); + assert(is_valid(victim_entry)); + trigger(Event:Evict, victim, victim_entry, victim_tbe); + } + } + } + } + + // Actions + + action(f_fwdReqToDir, "f", desc="Forward CPU request to directory") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) { + out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address + out_msg.Type := in_msg.OriginalType; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Dirty := in_msg.Dirty; + out_msg.Requestor := getCoreMachine(in_msg.Requestor,address); + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Shared := in_msg.Shared; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.Private := in_msg.Private; + out_msg.NoAckNeeded := true; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ProbeRequestStartTime := curCycle(); + out_msg.DemandRequest := true; + if (is_valid(cache_entry) && getState(tbe, cache_entry, address) != State:S) { + out_msg.Acks := cache_entry.Sharers.count(); + } else { + out_msg.Acks := 0; + } + } + } + } + + action(f_fwdReqToDirShared, "fs", desc="Forward CPU request to directory (shared)") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) { + out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address + out_msg.Type := in_msg.OriginalType; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Dirty := in_msg.Dirty; + out_msg.Requestor := getCoreMachine(in_msg.Requestor,address); + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Shared := in_msg.Shared; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.Private := in_msg.Private; + out_msg.NoAckNeeded := true; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ProbeRequestStartTime := curCycle(); + out_msg.DemandRequest := true; + out_msg.ForceShared := true; + if (is_valid(cache_entry) && getState(tbe, cache_entry, address) != State:S) { + out_msg.Acks := cache_entry.Sharers.count(); + } else { + out_msg.Acks := 0; + } + } + } + } + + action(f_fwdReqToDirWithAck, "fa", desc="Forward CPU request to directory with ack request") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) { + out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address + out_msg.Type := in_msg.OriginalType; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Dirty := in_msg.Dirty; + out_msg.Requestor := getCoreMachine(in_msg.Requestor,address); + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Shared := in_msg.Shared; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.Private := in_msg.Private; + out_msg.NoAckNeeded := false; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ProbeRequestStartTime := curCycle(); + out_msg.DemandRequest := true; + if (is_valid(cache_entry)) { + out_msg.Acks := cache_entry.Sharers.count(); + // Don't need an ack from the requestor! + if (cache_entry.Sharers.isElement(in_msg.Requestor)) { + out_msg.Acks := out_msg.Acks - 1; + } + } else { + out_msg.Acks := 0; + } + } + } + } + + action(f_fwdReqToDirWithAckShared, "fas", desc="Forward CPU request to directory with ack request") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) { + out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address + out_msg.Type := in_msg.OriginalType; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Dirty := in_msg.Dirty; + out_msg.Requestor := getCoreMachine(in_msg.Requestor,address); + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Shared := in_msg.Shared; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.Private := in_msg.Private; + out_msg.NoAckNeeded := false; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ProbeRequestStartTime := curCycle(); + out_msg.DemandRequest := true; + out_msg.ForceShared := true; + if (is_valid(cache_entry)) { + out_msg.Acks := cache_entry.Sharers.count(); + // Don't need an ack from the requestor! + if (cache_entry.Sharers.isElement(in_msg.Requestor)) { + out_msg.Acks := out_msg.Acks - 1; + } + } else { + out_msg.Acks := 0; + } + } + } + } + + action(a_allocateRegionEntry, "a", desc="Allocate a new entry") { + set_cache_entry(cacheMemory.allocate(getRegionBase(address), new Entry)); + peek(requestNetwork_in, CPURequestMsg) { + APPEND_TRANSITION_COMMENT(in_msg.Requestor); + } + } + + action(d_deallocateRegionEntry, "d", desc="Deallocate region entry") { + cacheMemory.deallocate(getRegionBase(address)); + unset_cache_entry(); + } + + action(ra_receiveAck, "ra", desc="Mark TBE entry as received this ack") { + //assert(tbe.ValidBlocks.at(getRegionOffset(address))); + DPRINTF(RubySlicc, "received ack for %s reg: %s\n", address, getRegionBase(address)); + tbe.NumValidBlocks := tbe.NumValidBlocks - 1; + assert(tbe.NumValidBlocks >= 0); + if (tbe.NumValidBlocks == 0) { + tbe.AllAcksReceived := true; + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.Type := TriggerType:AcksComplete; + out_msg.addr := address; + } + } + APPEND_TRANSITION_COMMENT(getRegionBase(address)); + APPEND_TRANSITION_COMMENT(" Acks left receive "); + APPEND_TRANSITION_COMMENT(tbe.NumValidBlocks); + } + + action(ca_checkAcks, "ca", desc="Check to see if we need more acks") { + if (tbe.NumValidBlocks == 0) { + tbe.AllAcksReceived := true; + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.Type := TriggerType:AcksComplete; + out_msg.addr := address; + } + } + } + + action(ti_triggerInv, "ti", desc="") { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.Type := TriggerType:InvRegion; + out_msg.addr := address; + } + } + + action(td_triggerDowngrade, "td", desc="") { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.Type := TriggerType:DowngradeRegion; + out_msg.addr := address; + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + TBEs.allocate(getRegionBase(address)); + set_tbe(getTBE(address)); + if (is_valid(cache_entry)) { + tbe.Owner := cache_entry.Owner; + tbe.Sharers := cache_entry.Sharers; + tbe.AllAcksReceived := true; // assume no acks are required + } + tbe.ProbeRequestTime := curCycle(); + peek(requestNetwork_in, CPURequestMsg) { + tbe.InitialRequestTime := in_msg.InitialRequestTime; + tbe.DemandAddress := in_msg.addr; + } + APPEND_TRANSITION_COMMENT(getRegionBase(address)); + APPEND_TRANSITION_COMMENT(" Acks left "); + APPEND_TRANSITION_COMMENT(tbe.NumValidBlocks); + APPEND_TRANSITION_COMMENT(" Owner, "); + APPEND_TRANSITION_COMMENT(tbe.Owner); + APPEND_TRANSITION_COMMENT(" sharers, "); + APPEND_TRANSITION_COMMENT(tbe.Sharers); + } + + action(ss_setSharers, "ss", desc="Add requestor to sharers") { + peek(requestNetwork_in, CPURequestMsg) { + cache_entry.Sharers.add(in_msg.Requestor); + APPEND_TRANSITION_COMMENT(cache_entry.Sharers); + } + } + + action(rs_removeSharer, "rs", desc="Remove requestor to sharers") { + peek(requestNetwork_in, CPURequestMsg) { + cache_entry.Sharers.remove(in_msg.Requestor); + APPEND_TRANSITION_COMMENT(" removing "); + APPEND_TRANSITION_COMMENT(in_msg.Requestor); + APPEND_TRANSITION_COMMENT(" sharers "); + APPEND_TRANSITION_COMMENT(cache_entry.Sharers); + } + } + + action(rsr_removeSharerResponse, "rsr", desc="Remove requestor to sharers") { + peek(responseNetwork_in, ResponseMsg) { + cache_entry.Sharers.remove(in_msg.Sender); + APPEND_TRANSITION_COMMENT(cache_entry.Sharers); + } + } + + action(cs_clearSharers, "cs", desc="Add requestor to sharers") { + cache_entry.Sharers.clear(); + } + + action(so_setOwner, "so", desc="Set the owner to the requestor") { + peek(requestNetwork_in, CPURequestMsg) { + cache_entry.Owner := in_msg.Requestor; + APPEND_TRANSITION_COMMENT(" Owner now: "); + APPEND_TRANSITION_COMMENT(cache_entry.Owner); + } + } + + action(rr_removeRequestorFromTBE, "rr", desc="Remove requestor from TBE sharers") { + peek(requestNetwork_in, CPURequestMsg) { + tbe.Sharers.remove(in_msg.Requestor); + } + } + + action(ur_updateDirtyStatusOnRequest, "ur", desc="Update dirty status on demand request") { + peek(requestNetwork_in, CPURequestMsg) { + if (is_valid(cache_entry)) { + if ((in_msg.Type == CoherenceRequestType:SharedRequest) && + (cache_entry.Sharers.isElement(in_msg.Requestor) == false)) { + cache_entry.LastWriten := false; + if (isCpuMachine(in_msg.Requestor)) { + cache_entry.LastWritenByCpu := false; + } else { + cache_entry.LastWritenByGpu := false; + } + } else if ((in_msg.Type == CoherenceRequestType:PrivateRequest) || + (in_msg.Type == CoherenceRequestType:UpgradeRequest)) { + cache_entry.LastWriten := true; + if (isCpuMachine(in_msg.Requestor)) { + cache_entry.LastWritenByCpu := true; + } else { + cache_entry.LastWritenByGpu := true; + } + } + } + } + } + + action(ud_updateDirtyStatusWithWb, "ud", desc="Update dirty status on writeback") { + peek(requestNetwork_in, CPURequestMsg) { + if (is_valid(cache_entry) && in_msg.Dirty) { + cache_entry.LastWriten := true; + if (isCpuMachine(in_msg.Requestor)) { + cache_entry.LastWritenByCpu := true; + } else { + cache_entry.LastWritenByGpu := true; + } + } + } + } + + action(sns_setNumAcksSharers, "sns", desc="Set number of acks to one per shared region buffer") { + assert(is_valid(tbe)); + assert(is_valid(cache_entry)); + tbe.NumValidBlocks := tbe.Sharers.count(); + } + + action(sno_setNumAcksOne, "sno", desc="Set number of acks to one per shared region buffer") { + assert(is_valid(tbe)); + assert(is_valid(cache_entry)); + tbe.NumValidBlocks := 1; + } + + action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") { + TBEs.deallocate(getRegionBase(address)); + APPEND_TRANSITION_COMMENT(" reg: "); + APPEND_TRANSITION_COMMENT(getRegionBase(address)); + unset_tbe(); + } + + action(wb_sendWbNotice, "wb", desc="Send notice to cache that writeback is acknowledged") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(notifyNetwork_out, CPURequestMsg, 1) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceRequestType:WbNotify; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Requestor := machineID; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + } + } + } + + action(wbn_sendWbNoticeNoAck, "wbn", desc="Send notice to cache that writeback is acknowledged (no ack needed)") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(notifyNetwork_out, CPURequestMsg, 1) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceRequestType:WbNotify; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Requestor := machineID; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.NoAckNeeded := true; + } + } + } + + action(b_sendPrivateNotice, "b", desc="Send notice to private cache that it has private access") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(notifyNetwork_out, CPURequestMsg, 1) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceRequestType:PrivateNotify; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Requestor := machineID; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + } + } + } + + action(bs_sendSharedNotice, "bs", desc="Send notice to private cache that it has private access") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(notifyNetwork_out, CPURequestMsg, 1) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceRequestType:SharedNotify; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Requestor := machineID; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + } + } + } + + action(c_sendSharedNoticeToOrigReq, "c", desc="Send notice to private cache that it has shared access") { + assert(is_valid(tbe)); + enqueue(notifyNetwork_out, CPURequestMsg, 1) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceRequestType:SharedNotify; + out_msg.Destination.add(tbe.Owner); + out_msg.Requestor := machineID; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestTime; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + APPEND_TRANSITION_COMMENT("dest: "); + APPEND_TRANSITION_COMMENT(out_msg.Destination); + } + } + + action(sp_sendPrivateNoticeToOrigReq, "sp", desc="Send notice to private cache that it has private access") { + assert(is_valid(tbe)); + enqueue(notifyNetwork_out, CPURequestMsg, 1) { + out_msg.addr := getRegionBase(address); + out_msg.Type := CoherenceRequestType:PrivateNotify; + out_msg.Destination.add(tbe.Owner); + out_msg.Requestor := machineID; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestTime; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + APPEND_TRANSITION_COMMENT("dest: "); + APPEND_TRANSITION_COMMENT(out_msg.Destination); + } + } + + action(i_RegionInvNotify, "i", desc="Send notice to private cache that it no longer has private access") { + enqueue(probeNetwork_out, NBProbeRequestMsg, 1) { + out_msg.addr := address; + out_msg.DemandAddress := tbe.DemandAddress; + //out_msg.Requestor := tbe.Requestor; + out_msg.Requestor := machineID; + out_msg.Type := ProbeRequestType:PrbInv; + //Fix me: assert(tbe.Sharers.count() > 0); + out_msg.DemandRequest := true; + out_msg.Destination := tbe.Sharers; + out_msg.MessageSize := MessageSizeType:Request_Control; + APPEND_TRANSITION_COMMENT("dest: "); + APPEND_TRANSITION_COMMENT(out_msg.Destination); + } + } + + action(i0_RegionInvNotifyDemand0, "i0", desc="Send notice to private cache that it no longer has private access") { + enqueue(probeNetwork_out, NBProbeRequestMsg, 1) { + out_msg.addr := address; + // Demand address should default to 0 -> out_msg.DemandAddress := 0; + out_msg.Requestor := machineID; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.Destination := tbe.Sharers; + out_msg.MessageSize := MessageSizeType:Request_Control; + APPEND_TRANSITION_COMMENT("dest: "); + APPEND_TRANSITION_COMMENT(out_msg.Destination); + } + } + + action(rd_RegionDowngrade, "rd", desc="Send notice to private cache that it only has shared access") { + enqueue(probeNetwork_out, NBProbeRequestMsg, 1) { + out_msg.addr := address; + out_msg.DemandAddress := tbe.DemandAddress; + out_msg.Requestor := machineID; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.DemandRequest := true; + out_msg.Destination := tbe.Sharers; + out_msg.MessageSize := MessageSizeType:Request_Control; + APPEND_TRANSITION_COMMENT("dest: "); + APPEND_TRANSITION_COMMENT(out_msg.Destination); + } + } + + action(p_popRequestQueue, "p", desc="Pop the request queue") { + requestNetwork_in.dequeue(clockEdge()); + } + + action(pt_popTriggerQueue, "pt", desc="Pop the trigger queue") { + triggerQueue_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="Pop the response queue") { + responseNetwork_in.dequeue(clockEdge()); + } + + action(s_stallAndWaitRequest, "s", desc="Stall and wait on the region address") { + Addr regAddr := getRegionBase(address); + stall_and_wait(requestNetwork_in, regAddr); + } + + action(w_wakeUpRegionDependents, "w", desc="Wake up any requests waiting for this region") { + wakeUpBuffers(getRegionBase(address)); + } + + action(wa_wakeUpAllDependents, "wa", desc="Wake up any requests waiting for this region") { + wakeUpAllBuffers(); + } + + action(zz_recycleRequestQueue, "\z", desc="...") { + requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(z_stall, "z", desc="stall request queue") { + // fake state + } + + action(mru_setMRU, "mru", desc="set MRU") { + cacheMemory.setMRU(address); + } + + // Transistions + + transition({NP_P, P_P, NP_S, S_S, S_P, P_S, P_NP, S_AP, P_AS, P_AP, SP_NP_W, S_W, P_AP_W, P_AS_W, S_AP_W}, {PrivateRequest, SharedRequest, UpgradeRequest, SendInv, SendUpgrade, SendDowngrade, CleanWbRequest, CleanWbRequest_LastSharer, StaleCleanWbRequest}) { + s_stallAndWaitRequest + } + + transition({NP_P, P_P, NP_S, S_S, S_P, S_W, P_S, P_NP, S_AP, P_AS, P_AP, P_AP_W, P_AS_W, S_AP_W}, Evict) { + zz_recycleRequestQueue; + } + + transition(NP, {PrivateRequest, SendUpgrade}, NP_P) {TagArrayRead, TagArrayWrite} { + a_allocateRegionEntry; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDir; + b_sendPrivateNotice; + so_setOwner; + ss_setSharers; + t_allocateTBE; + p_popRequestQueue; + } + + transition(P, {PrivateRequest, UpgradeRequest}, P_P) {TagArrayRead} { + mru_setMRU; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDir; + b_sendPrivateNotice; + t_allocateTBE; + p_popRequestQueue; + } + + transition({NP_P, P_P}, CPUPrivateAck, P) { + dt_deallocateTBE; + w_wakeUpRegionDependents; + pr_popResponseQueue; + } + + transition({NP, P, S}, StaleCleanWbRequest) {TagArrayRead, TagArrayWrite} { + wbn_sendWbNoticeNoAck; + ud_updateDirtyStatusWithWb; + p_popRequestQueue; + } + + transition(NP, SharedRequest, NP_S) {TagArrayRead, TagArrayWrite} { + a_allocateRegionEntry; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDirShared; + bs_sendSharedNotice; + so_setOwner; + ss_setSharers; + t_allocateTBE; + p_popRequestQueue; + } + + // Could probably do this in parallel with other shared requests + transition(S, SharedRequest, S_S) {TagArrayRead, TagArrayWrite} { + mru_setMRU; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDirShared; + bs_sendSharedNotice; + ss_setSharers; + t_allocateTBE; + p_popRequestQueue; + } + + transition({P, S}, CleanWbRequest_LastSharer, SP_NP_W) {TagArrayRead, TagArrayWrite} { + ud_updateDirtyStatusWithWb; + wb_sendWbNotice; + rs_removeSharer; + t_allocateTBE; + d_deallocateRegionEntry; + p_popRequestQueue; + } + + transition(S, CleanWbRequest, S_W) {TagArrayRead, TagArrayWrite} { + ud_updateDirtyStatusWithWb; + wb_sendWbNotice; + rs_removeSharer; + t_allocateTBE; + p_popRequestQueue; + } + + transition(SP_NP_W, WritebackAck, NP) { + dt_deallocateTBE; + w_wakeUpRegionDependents; + pr_popResponseQueue; + } + + transition(S_W, WritebackAck, S) { + dt_deallocateTBE; + w_wakeUpRegionDependents; + pr_popResponseQueue; + } + + transition({NP_S, S_S}, CPUPrivateAck, S) { + dt_deallocateTBE; + w_wakeUpRegionDependents; + pr_popResponseQueue; + } + + transition(S, UpgradeRequest, S_P) {TagArrayRead, TagArrayWrite} { + mru_setMRU; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDir; + b_sendPrivateNotice; + so_setOwner; + t_allocateTBE; + p_popRequestQueue; + } + + transition(S_P, CPUPrivateAck, P) { + dt_deallocateTBE; + w_wakeUpRegionDependents; + pr_popResponseQueue; + } + + transition(P, SendInv, P_AP_W) {TagArrayRead, TagArrayWrite} { + mru_setMRU; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDirWithAck; + so_setOwner; + t_allocateTBE; + rr_removeRequestorFromTBE; + sns_setNumAcksSharers; + cs_clearSharers; + ss_setSharers; + //i_RegionInvNotify; + p_popRequestQueue; + } + + transition({P_AP_W, S_AP_W}, DirReadyAck) { + ti_triggerInv; + pr_popResponseQueue; + } + + transition(P_AS_W, DirReadyAck) { + td_triggerDowngrade; + pr_popResponseQueue; + } + + transition(P_AS_W, TriggerDowngrade, P_AS) { + rd_RegionDowngrade; + pt_popTriggerQueue; + } + + transition(P_AP_W, TriggerInv, P_AP) { + i_RegionInvNotify; + pt_popTriggerQueue; + } + + transition(S_AP_W, TriggerInv, S_AP) { + i_RegionInvNotify; + pt_popTriggerQueue; + } + + transition(P, SendUpgrade, P_AP_W) {TagArrayRead, TagArrayWrite} { + mru_setMRU; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDirWithAck; + so_setOwner; + t_allocateTBE; + rr_removeRequestorFromTBE; + sns_setNumAcksSharers; + cs_clearSharers; + ss_setSharers; + p_popRequestQueue; + } + + transition(P, Evict, P_NP) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + sns_setNumAcksSharers; + i0_RegionInvNotifyDemand0; + d_deallocateRegionEntry; + } + + transition(S, SendInv, P_AP_W) {TagArrayRead, TagArrayWrite} { + mru_setMRU; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDirWithAck; + so_setOwner; + t_allocateTBE; + rr_removeRequestorFromTBE; + sns_setNumAcksSharers; + cs_clearSharers; + ss_setSharers; + p_popRequestQueue; + } + + transition(S, Evict, P_NP) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + sns_setNumAcksSharers; + i0_RegionInvNotifyDemand0; + d_deallocateRegionEntry; + } + + transition(P_NP, LastAck, NP) { + dt_deallocateTBE; + wa_wakeUpAllDependents; + pt_popTriggerQueue; + } + + transition(S, SendUpgrade, S_AP_W) {TagArrayRead, TagArrayWrite} { + mru_setMRU; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDirWithAck; + so_setOwner; + t_allocateTBE; + rr_removeRequestorFromTBE; + sns_setNumAcksSharers; + cs_clearSharers; + ss_setSharers; + p_popRequestQueue; + } + + transition(S_AP, LastAck, S_P) { + sp_sendPrivateNoticeToOrigReq; + pt_popTriggerQueue; + } + + transition(P_AP, LastAck, P_P) { + sp_sendPrivateNoticeToOrigReq; + pt_popTriggerQueue; + } + + transition(P, SendDowngrade, P_AS_W) {TagArrayRead, TagArrayWrite} { + mru_setMRU; + ur_updateDirtyStatusOnRequest; + f_fwdReqToDirWithAckShared; + so_setOwner; + t_allocateTBE; + sns_setNumAcksSharers; + ss_setSharers; //why do we set the sharers before sending the downgrade? Are we sending a downgrade to the requestor? + p_popRequestQueue; + } + + transition(P_AS, LastAck, P_S) { + c_sendSharedNoticeToOrigReq; + pt_popTriggerQueue; + } + + transition(P_S, CPUPrivateAck, S) { + dt_deallocateTBE; + w_wakeUpRegionDependents; + pr_popResponseQueue; + } + + transition({P_NP, P_AS, S_AP, P_AP}, InvAckCore) {} { + ra_receiveAck; + pr_popResponseQueue; + } + + transition({P_NP, S_AP, P_AP}, InvAckCoreNoShare) {} { + ra_receiveAck; + pr_popResponseQueue; + } + + transition(P_AS, InvAckCoreNoShare) {} { + ra_receiveAck; + rsr_removeSharerResponse; + pr_popResponseQueue; + } + +} + + diff --git a/src/mem/protocol/MOESI_AMD_Base-dir.sm b/src/mem/protocol/MOESI_AMD_Base-dir.sm new file mode 100644 index 000000000..52cefda66 --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-dir.sm @@ -0,0 +1,1137 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +machine(MachineType:Directory, "AMD Baseline protocol") +: DirectoryMemory * directory; + CacheMemory * L3CacheMemory; + Cycles response_latency := 5; + Cycles l3_hit_latency := 50; + bool noTCCdir := "False"; + bool CPUonly := "False"; + int TCC_select_num_bits; + bool useL3OnWT := "False"; + Cycles to_memory_controller_latency := 1; + + // From the Cores + MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request"; + MessageBuffer * responseFromCores, network="From", virtual_network="2", vnet_type="response"; + MessageBuffer * unblockFromCores, network="From", virtual_network="4", vnet_type="unblock"; + + MessageBuffer * probeToCore, network="To", virtual_network="0", vnet_type="request"; + MessageBuffer * responseToCore, network="To", virtual_network="2", vnet_type="response"; + + MessageBuffer * triggerQueue; + MessageBuffer * L3triggerQueue; + MessageBuffer * responseFromMemory; +{ + // STATES + state_declaration(State, desc="Directory states", default="Directory_State_U") { + U, AccessPermission:Backing_Store, desc="unblocked"; + BL, AccessPermission:Busy, desc="got L3 WB request"; + // BL is Busy because it's possible for the data only to be in the network + // in the WB, L3 has sent it and gone on with its business in possibly I + // state. + BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory"; + BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack"; + } + + // Events + enumeration(Event, desc="Directory events") { + // CPU requests + RdBlkS, desc="..."; + RdBlkM, desc="..."; + RdBlk, desc="..."; + CtoD, desc="..."; + WriteThrough, desc="WriteThrough Message"; + Atomic, desc="Atomic Message"; + + // writebacks + VicDirty, desc="..."; + VicClean, desc="..."; + CPUData, desc="WB data from CPU"; + StaleWB, desc="Notification that WB has been superceded by a probe"; + + // probe responses + CPUPrbResp, desc="Probe Response Msg"; + + ProbeAcksComplete, desc="Probe Acks Complete"; + + L3Hit, desc="Hit in L3 return data to core"; + + // Memory Controller + MemData, desc="Fetched data from memory arrives"; + WBAck, desc="Writeback Ack from memory arrives"; + + CoreUnblock, desc="Core received data, unblock"; + UnblockWriteThrough, desc="Unblock because of writethrough request finishing"; + + StaleVicDirty, desc="Core invalidated before VicDirty processed"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + L3DataArrayRead, desc="Read the data array"; + L3DataArrayWrite, desc="Write the data array"; + L3TagArrayRead, desc="Read the data array"; + L3TagArrayWrite, desc="Write the data array"; + } + + // TYPES + + // DirectoryEntry + structure(Entry, desc="...", interface="AbstractEntry") { + State DirectoryState, desc="Directory state"; + DataBlock DataBlk, desc="data for the block"; + NetDest VicDirtyIgnore, desc="VicDirty coming from whom to ignore"; + } + + structure(CacheEntry, desc="...", interface="AbstractCacheEntry") { + DataBlock DataBlk, desc="data for the block"; + MachineID LastSender, desc="Mach which this block came from"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block"; + bool Dirty, desc="Is the data dirty?"; + int NumPendingAcks, desc="num acks expected"; + MachineID OriginalRequestor, desc="Original Requestor"; + MachineID WTRequestor, desc="WT Requestor"; + bool Cached, desc="data hit in Cache"; + bool MemData, desc="Got MemData?",default="false"; + bool wtData, desc="Got write through data?",default="false"; + bool atomicData, desc="Got Atomic op?",default="false"; + Cycles InitialRequestTime, desc="..."; + Cycles ForwardRequestTime, desc="..."; + Cycles ProbeRequestStartTime, desc="..."; + MachineID LastSender, desc="Mach which this block came from"; + bool L3Hit, default="false", desc="Was this an L3 hit?"; + uint64_t probe_id, desc="probe id for lifetime profiling"; + WriteMask writeMask, desc="outstanding write through mask"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs"; + + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + void set_tbe(TBE a); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" { + Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr)); + + if (is_valid(dir_entry)) { + return dir_entry; + } + + dir_entry := static_cast(Entry, "pointer", + directory.allocate(addr, new Entry)); + return dir_entry; + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if (is_valid(tbe) && tbe.MemData) { + DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe); + return tbe.DataBlk; + } + DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr)); + return getDirectoryEntry(addr).DataBlk; + } + + State getState(TBE tbe, CacheEntry entry, Addr addr) { + return getDirectoryEntry(addr).DirectoryState; + } + + void setState(TBE tbe, CacheEntry entry, Addr addr, State state) { + getDirectoryEntry(addr).DirectoryState := state; + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + AccessPermission getAccessPermission(Addr addr) { + // For this Directory, all permissions are just tracked in Directory, since + // it's not possible to have something in TBE but not Dir, just keep track + // of state all in one place. + if (directory.isPresent(addr)) { + return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState); + } + + return AccessPermission:NotPresent; + } + + void setAccessPermission(CacheEntry entry, Addr addr, State state) { + getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state)); + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:L3DataArrayRead) { + L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L3DataArrayWrite) { + L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L3TagArrayRead) { + L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L3TagArrayWrite) { + L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:L3DataArrayRead) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L3DataArrayWrite) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L3TagArrayRead) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L3TagArrayWrite) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + // ** OUT_PORTS ** + out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore); + out_port(responseNetwork_out, ResponseMsg, responseToCore); + + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue); + + // ** IN_PORTS ** + + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=5) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == TriggerType:AcksComplete) { + trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe); + }else if (in_msg.Type == TriggerType:UnblockWriteThrough) { + trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe); + } else { + error("Unknown trigger msg"); + } + } + } + } + + in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=4) { + if (L3TriggerQueue_in.isReady(clockEdge())) { + peek(L3TriggerQueue_in, TriggerMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == TriggerType:L3Hit) { + trigger(Event:L3Hit, in_msg.addr, entry, tbe); + } else { + error("Unknown trigger msg"); + } + } + } + } + + // Unblock Network + in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=3) { + if (unblockNetwork_in.isReady(clockEdge())) { + peek(unblockNetwork_in, UnblockMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + trigger(Event:CoreUnblock, in_msg.addr, entry, tbe); + } + } + } + + // Core response network + in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=2) { + if (responseNetwork_in.isReady(clockEdge())) { + peek(responseNetwork_in, ResponseMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == CoherenceResponseType:CPUPrbResp) { + trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:CPUData) { + trigger(Event:CPUData, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:StaleNotif) { + trigger(Event:StaleWB, in_msg.addr, entry, tbe); + } else { + error("Unexpected response type"); + } + } + } + } + + // off-chip memory request/response is done + in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=1) { + if (memQueue_in.isReady(clockEdge())) { + peek(memQueue_in, MemoryMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:MemData, in_msg.addr, entry, tbe); + DPRINTF(RubySlicc, "%s\n", in_msg); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them. + } else { + DPRINTF(RubySlicc, "%s\n", in_msg.Type); + error("Invalid message"); + } + } + } + } + + in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) { + if (requestNetwork_in.isReady(clockEdge())) { + peek(requestNetwork_in, CPURequestMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == CoherenceRequestType:RdBlk) { + trigger(Event:RdBlk, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { + trigger(Event:RdBlkS, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { + trigger(Event:RdBlkM, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:WriteThrough) { + trigger(Event:WriteThrough, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:Atomic) { + trigger(Event:Atomic, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:VicDirty) { + if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { + DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr); + trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe); + } else { + DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr); + trigger(Event:VicDirty, in_msg.addr, entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:VicClean) { + if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { + DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr); + trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe); + } else { + DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr); + trigger(Event:VicClean, in_msg.addr, entry, tbe); + } + } else { + error("Bad request message type"); + } + } + } + } + + // Actions + action(s_sendResponseS, "s", desc="send Shared response") { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Shared; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := tbe.ForwardRequestTime; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(es_sendResponseES, "es", desc="send Exclusive or Shared response") { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := tbe.Dirty; + if (tbe.Cached) { + out_msg.State := CoherenceState:Shared; + } else { + out_msg.State := CoherenceState:Exclusive; + } + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := tbe.ForwardRequestTime; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(m_sendResponseM, "m", desc="send Modified response") { + if (tbe.wtData) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:UnblockWriteThrough; + } + }else{ + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := tbe.Dirty; + out_msg.State := CoherenceState:Modified; + out_msg.CtoD := false; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := tbe.ForwardRequestTime; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + if(tbe.atomicData){ + out_msg.WTRequestor := tbe.WTRequestor; + } + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + if (tbe.atomicData) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:UnblockWriteThrough; + } + } + } + } + + action(c_sendResponseCtoD, "c", desc="send CtoD Ack") { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Modified; + out_msg.CtoD := true; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(w_sendResponseWBAck, "w", desc="send WB Ack") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysWBAck; + out_msg.Destination.add(in_msg.Requestor); + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := curCycle(); + } + } + } + + action(l_queueMemWBReq, "lq", desc="Write WB data to memory") { + peek(responseNetwork_in, ResponseMsg) { + queueMemoryWrite(machineID, address, to_memory_controller_latency, + in_msg.DataBlk); + } + } + + action(l_queueMemRdReq, "lr", desc="Read data from memory") { + peek(requestNetwork_in, CPURequestMsg) { + if (L3CacheMemory.isTagPresent(address)) { + enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + if (tbe.Dirty == false) { + tbe.DataBlk := entry.DataBlk; + } + tbe.LastSender := entry.LastSender; + tbe.L3Hit := true; + tbe.MemData := true; + L3CacheMemory.deallocate(address); + } else { + queueMemoryRead(machineID, address, to_memory_controller_latency); + } + } + } + + action(dc_probeInvCoreData, "dc", desc="probe inv cores, return data") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + + // add relevant TCC node to list. This replaces all TCPs and SQCs + if (((in_msg.Type == CoherenceRequestType:WriteThrough || + in_msg.Type == CoherenceRequestType:Atomic) && + in_msg.NoWriteConflict) || + CPUonly) { + } else if (noTCCdir) { + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + } else { + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + } + out_msg.Destination.remove(in_msg.Requestor); + tbe.NumPendingAcks := out_msg.Destination.count(); + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + DPRINTF(RubySlicc, "%s\n", out_msg); + APPEND_TRANSITION_COMMENT(" dc: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") { + peek(requestNetwork_in, CPURequestMsg) { // not the right network? + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + // add relevant TCC node to the list. This replaces all TCPs and SQCs + if (noTCCdir || CPUonly) { + //Don't need to notify TCC about reads + } else { + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + tbe.NumPendingAcks := tbe.NumPendingAcks + 1; + } + if (noTCCdir && !CPUonly) { + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + } + out_msg.Destination.remove(in_msg.Requestor); + tbe.NumPendingAcks := out_msg.Destination.count(); + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + DPRINTF(RubySlicc, "%s\n", (out_msg)); + APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") { + peek(requestNetwork_in, CPURequestMsg) { // not the right network? + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := false; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + + // add relevant TCC node to the list. This replaces all TCPs and SQCs + if (noTCCdir && !CPUonly) { + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + } else { + if (!noTCCdir) { + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + TCC_select_low_bit, + TCC_select_num_bits)); + } + } + out_msg.Destination.remove(in_msg.Requestor); + tbe.NumPendingAcks := out_msg.Destination.count(); + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + APPEND_TRANSITION_COMMENT(" ic: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + DPRINTF(RubySlicc, "%s\n", out_msg); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(d_writeDataToMemory, "d", desc="Write data to memory") { + peek(responseNetwork_in, ResponseMsg) { + getDirectoryEntry(address).DataBlk := in_msg.DataBlk; + if (tbe.Dirty == false) { + // have to update the TBE, too, because of how this + // directory deals with functional writes + tbe.DataBlk := in_msg.DataBlk; + } + } + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + peek(requestNetwork_in, CPURequestMsg) { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + tbe.writeMask.clear(); + tbe.writeMask.orMask(in_msg.writeMask); + tbe.wtData := true; + tbe.WTRequestor := in_msg.WTRequestor; + tbe.LastSender := in_msg.Requestor; + } + if (in_msg.Type == CoherenceRequestType:Atomic) { + tbe.writeMask.clear(); + tbe.writeMask.orMask(in_msg.writeMask); + tbe.atomicData := true; + tbe.WTRequestor := in_msg.WTRequestor; + tbe.LastSender := in_msg.Requestor; + } + tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs + tbe.Dirty := false; + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + tbe.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask); + tbe.Dirty := true; + } + tbe.OriginalRequestor := in_msg.Requestor; + tbe.NumPendingAcks := 0; + tbe.Cached := in_msg.ForceShared; + tbe.InitialRequestTime := in_msg.InitialRequestTime; + } + } + + action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") { + if (tbe.Dirty == false) { + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } + TBEs.deallocate(address); + unset_tbe(); + } + + action(wd_writeBackData, "wd", desc="Write back data if needed") { + if (tbe.wtData) { + getDirectoryEntry(address).DataBlk.copyPartial(tbe.DataBlk, tbe.writeMask); + } else if (tbe.atomicData) { + tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask); + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } else if (tbe.Dirty == false) { + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } + } + + action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") { + peek(memQueue_in, MemoryMsg) { + if (tbe.wtData == true) { + // do nothing + } else if (tbe.Dirty == false) { + tbe.DataBlk := getDirectoryEntry(address).DataBlk; + } + tbe.MemData := true; + } + } + + action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") { + peek(responseNetwork_in, ResponseMsg) { + if (in_msg.Dirty) { + if (tbe.wtData) { + DataBlock tmp := in_msg.DataBlk; + tmp.copyPartial(tbe.DataBlk,tbe.writeMask); + tbe.DataBlk := tmp; + tbe.writeMask.fillMask(); + } else if (tbe.Dirty) { + if(tbe.atomicData == false && tbe.wtData == false) { + DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender); + assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data + } + } else { + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + tbe.LastSender := in_msg.Sender; + } + } + if (in_msg.Hit) { + tbe.Cached := true; + } + } + } + + action(mwc_markSinkWriteCancel, "mwc", desc="Mark to sink impending VicDirty") { + peek(responseNetwork_in, ResponseMsg) { + getDirectoryEntry(address).VicDirtyIgnore.add(in_msg.Sender); + APPEND_TRANSITION_COMMENT(" setting bit to sink VicDirty "); + } + } + + action(x_decrementAcks, "x", desc="decrement Acks pending") { + tbe.NumPendingAcks := tbe.NumPendingAcks - 1; + APPEND_TRANSITION_COMMENT(" Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + } + + action(o_checkForCompletion, "o", desc="check for ack completion") { + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + APPEND_TRANSITION_COMMENT(" Check: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + } + + action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") { + peek(requestNetwork_in, CPURequestMsg) { + getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor); + } + } + + action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") { + peek(responseNetwork_in, ResponseMsg) { + if (L3CacheMemory.isTagPresent(address)) { + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) "); + entry.DataBlk := in_msg.DataBlk; + entry.LastSender := in_msg.Sender; + } else { + if (L3CacheMemory.cacheAvail(address) == false) { + Addr victim := L3CacheMemory.cacheProbe(address); + CacheEntry victim_entry := static_cast(CacheEntry, "pointer", + L3CacheMemory.lookup(victim)); + queueMemoryWrite(machineID, victim, to_memory_controller_latency, + victim_entry.DataBlk); + L3CacheMemory.deallocate(victim); + } + assert(L3CacheMemory.cacheAvail(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 "); + entry.DataBlk := in_msg.DataBlk; + + entry.LastSender := in_msg.Sender; + } + } + } + + action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") { + if ((tbe.wtData || tbe.atomicData) && useL3OnWT) { + if (L3CacheMemory.isTagPresent(address)) { + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) "); + entry.DataBlk := tbe.DataBlk; + entry.LastSender := tbe.LastSender; + } else { + if (L3CacheMemory.cacheAvail(address) == false) { + Addr victim := L3CacheMemory.cacheProbe(address); + CacheEntry victim_entry := static_cast(CacheEntry, "pointer", + L3CacheMemory.lookup(victim)); + queueMemoryWrite(machineID, victim, to_memory_controller_latency, + victim_entry.DataBlk); + L3CacheMemory.deallocate(victim); + } + assert(L3CacheMemory.cacheAvail(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 "); + entry.DataBlk := tbe.DataBlk; + entry.LastSender := tbe.LastSender; + } + } + } + + action(sf_setForwardReqTime, "sf", desc="...") { + tbe.ForwardRequestTime := curCycle(); + } + + action(dl_deallocateL3, "dl", desc="deallocate the L3 block") { + L3CacheMemory.deallocate(address); + } + + action(p_popRequestQueue, "p", desc="pop request queue") { + requestNetwork_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="pop response queue") { + responseNetwork_in.dequeue(clockEdge()); + } + + action(pm_popMemQueue, "pm", desc="pop mem queue") { + memQueue_in.dequeue(clockEdge()); + } + + action(pt_popTriggerQueue, "pt", desc="pop trigger queue") { + triggerQueue_in.dequeue(clockEdge()); + } + + action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") { + L3TriggerQueue_in.dequeue(clockEdge()); + } + + action(pu_popUnblockQueue, "pu", desc="pop unblock queue") { + unblockNetwork_in.dequeue(clockEdge()); + } + + action(zz_recycleRequestQueue, "zz", desc="recycle request queue") { + requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(yy_recycleResponseQueue, "yy", desc="recycle response queue") { + responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") { + stall_and_wait(requestNetwork_in, address); + } + + action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") { + wakeUpBuffers(address); + } + + action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") { + wakeUpAllBuffers(); + } + + action(z_stall, "z", desc="...") { + } + + // TRANSITIONS + transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) { + st_stallAndWaitRequest; + } + + // It may be possible to save multiple invalidations here! + transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {Atomic, WriteThrough}) { + st_stallAndWaitRequest; + } + + + // transitions from U + transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead} { + t_allocateTBE; + l_queueMemRdReq; + sc_probeShrCoreData; + p_popRequestQueue; + } + + transition(U, WriteThrough, BM_PM) {L3TagArrayRead, L3TagArrayWrite} { + t_allocateTBE; + w_sendResponseWBAck; + l_queueMemRdReq; + dc_probeInvCoreData; + p_popRequestQueue; + } + + transition(U, Atomic, BM_PM) {L3TagArrayRead, L3TagArrayWrite} { + t_allocateTBE; + l_queueMemRdReq; + dc_probeInvCoreData; + p_popRequestQueue; + } + + transition(U, {RdBlkM}, BM_PM) {L3TagArrayRead} { + t_allocateTBE; + l_queueMemRdReq; + dc_probeInvCoreData; + p_popRequestQueue; + } + + transition(U, RdBlk, B_PM) {L3TagArrayRead}{ + t_allocateTBE; + l_queueMemRdReq; + sc_probeShrCoreData; + p_popRequestQueue; + } + + transition(U, CtoD, BP) {L3TagArrayRead} { + t_allocateTBE; + ic_probeInvCore; + p_popRequestQueue; + } + + transition(U, VicDirty, BL) {L3TagArrayRead} { + t_allocateTBE; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(U, VicClean, BL) {L3TagArrayRead} { + t_allocateTBE; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(BL, {VicDirty, VicClean}) { + zz_recycleRequestQueue; + } + + transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} { + d_writeDataToMemory; + al_allocateL3Block; + wa_wakeUpDependents; + dt_deallocateTBE; + pr_popResponseQueue; + } + + transition(BL, StaleWB, U) {L3TagArrayWrite} { + dt_deallocateTBE; + wa_wakeUpAllDependents; + pr_popResponseQueue; + } + + transition({B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) { + z_stall; + } + + transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, WBAck) { + pm_popMemQueue; + } + + transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) { + rv_removeVicDirtyIgnore; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition({B}, CoreUnblock, U) { + wa_wakeUpDependents; + pu_popUnblockQueue; + } + + transition(B, UnblockWriteThrough, U) { + wa_wakeUpDependents; + pt_popTriggerQueue; + } + + transition(BS_PM, MemData, BS_Pm) {} { + mt_writeMemDataToTBE; + pm_popMemQueue; + } + + transition(BM_PM, MemData, BM_Pm){} { + mt_writeMemDataToTBE; + pm_popMemQueue; + } + + transition(B_PM, MemData, B_Pm){} { + mt_writeMemDataToTBE; + pm_popMemQueue; + } + + transition(BS_PM, L3Hit, BS_Pm) {} { + ptl_popTriggerQueue; + } + + transition(BM_PM, L3Hit, BM_Pm) {} { + ptl_popTriggerQueue; + } + + transition(B_PM, L3Hit, B_Pm) {} { + ptl_popTriggerQueue; + } + + transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { + mt_writeMemDataToTBE; + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pm_popMemQueue; + } + + transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { + mt_writeMemDataToTBE; + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pm_popMemQueue; + } + + transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { + mt_writeMemDataToTBE; + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pm_popMemQueue; + } + + transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} { + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + ptl_popTriggerQueue; + } + + transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} { + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + ptl_popTriggerQueue; + } + + transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} { + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + ptl_popTriggerQueue; + } + + transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BP}, CPUPrbResp) { + y_writeProbeDataToTBE; + x_decrementAcks; + o_checkForCompletion; + pr_popResponseQueue; + } + + transition(BS_PM, ProbeAcksComplete, BS_M) {} { + sf_setForwardReqTime; + pt_popTriggerQueue; + } + + transition(BM_PM, ProbeAcksComplete, BM_M) {} { + sf_setForwardReqTime; + pt_popTriggerQueue; + } + + transition(B_PM, ProbeAcksComplete, B_M){} { + sf_setForwardReqTime; + pt_popTriggerQueue; + } + + transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + sf_setForwardReqTime; + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + sf_setForwardReqTime; + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + sf_setForwardReqTime; + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} { + sf_setForwardReqTime; + c_sendResponseCtoD; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pt_popTriggerQueue; + } +} diff --git a/src/mem/protocol/MOESI_AMD_Base-msg.sm b/src/mem/protocol/MOESI_AMD_Base-msg.sm new file mode 100644 index 000000000..ff8842369 --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-msg.sm @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + + +enumeration(CoherenceRequestType, desc="Coherence Request Types") { + // CPU Request Types ONLY + RdBlk, desc="Read Blk"; + RdBlkM, desc="Read Blk Modified"; + RdBlkS, desc="Read Blk Shared"; + CtoD, desc="Change To Dirty"; + VicClean, desc="L2 clean eviction"; + VicDirty, desc="L2 dirty eviction"; + Atomic, desc="Upper level atomic"; + AtomicWriteBack, desc="Upper level atomic"; + WriteThrough, desc="Ordered WriteThrough w/Data"; + WriteThroughFifo, desc="WriteThrough with no data"; + WriteThroughDummy, desc="WriteThrough with no data for atomic operation"; + WriteFlush, desc="Release Flush"; + + WrCancel, desc="want to cancel WB to Memory"; // should this be here? + + WBApproval, desc="WB Approval"; + + // Messages between Dir and R-Dir + ForceInv, desc="Send invalide to the block"; + ForceDowngrade, desc="Send downgrade to the block"; + Unblock, desc="Used to let the dir know a message has been sunk"; + + // Messages between R-Dir and R-Buffer + PrivateNotify, desc="Let region buffer know it has private access"; + SharedNotify, desc="Let region buffer know it has shared access"; + WbNotify, desc="Let region buffer know it saw its wb request"; + Downgrade, desc="Force the region buffer to downgrade to shared"; + // Response to R-Dir (probably should be on a different network, but + // I need it to be ordered with respect to requests) + InvAck, desc="Let the R-Dir know when the inv has occured"; + + PrivateRequest, desc="R-buf wants the region in private"; + UpgradeRequest, desc="R-buf wants the region in private"; + SharedRequest, desc="R-buf wants the region in shared (could respond with private)"; + CleanWbRequest, desc="R-buf wants to deallocate clean region"; + + NA, desc="So we don't get segfaults"; +} + +enumeration(ProbeRequestType, desc="Probe Request Types") { + PrbDowngrade, desc="Probe for Status"; // EtoS, MtoO, StoS + PrbInv, desc="Probe to Invalidate"; + + // For regions + PrbRepl, desc="Force the cache to do a replacement"; + PrbRegDowngrade, desc="Probe for Status"; // EtoS, MtoO, StoS + PrbAtomic, desc="Forwarded Atomic Operation"; +} + + +enumeration(CoherenceResponseType, desc="Coherence Response Types") { + NBSysResp, desc="Northbridge response to CPU Rd request"; + NBSysWBAck, desc="Northbridge response ok to WB"; + TDSysResp, desc="TCCdirectory response to CPU Rd request"; + TDSysWBAck, desc="TCCdirectory response ok to WB"; + TDSysWBNack, desc="TCCdirectory response ok to drop"; + CPUPrbResp, desc="CPU Probe Response"; + CPUData, desc="CPU Data"; + StaleNotif, desc="Notification of Stale WBAck, No data to writeback"; + CPUCancelWB, desc="want to cancel WB to Memory"; + MemData, desc="Data from Memory"; + + // for regions + PrivateAck, desc="Ack that r-buf received private notify"; + RegionWbAck, desc="Writeback Ack that r-buf completed deallocation"; + DirReadyAck, desc="Directory (mem ctrl)<->region dir handshake"; +} + +enumeration(CoherenceState, default="CoherenceState_NA", desc="Coherence State") { + Modified, desc="Modified"; + Owned, desc="Owned state"; + Exclusive, desc="Exclusive"; + Shared, desc="Shared"; + NA, desc="NA"; +} + +structure(CPURequestMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + Addr DemandAddress, desc="Physical block address for this request"; + CoherenceRequestType Type, desc="Type of request"; + DataBlock DataBlk, desc="data for the cache line"; // only for WB + bool Dirty, desc="whether WB data is dirty"; // only for WB + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Multicast destination mask"; + bool Shared, desc="For CPU_WrVicBlk, vic is O not M. For CPU_ClVicBlk, vic is S"; + MessageSizeType MessageSize, desc="size category of the message"; + Cycles InitialRequestTime, desc="time the initial requests was sent from the L1Cache"; + Cycles ForwardRequestTime, desc="time the dir forwarded the request"; + Cycles ProbeRequestStartTime, desc="the time the dir started the probe request"; + bool DemandRequest, default="false", desc="For profiling purposes"; + + NetDest Sharers, desc="Caches that may have a valid copy of the data"; + bool ForceShared, desc="R-dir knows it is shared, pass on so it sends an S copy, not E"; + bool Private, default="false", desc="Requestor already has private permissions, no need for dir check"; + bool CtoDSinked, default="false", desc="This is true if the CtoD previously sent must have been sunk"; + + bool NoAckNeeded, default="false", desc="True if region buffer doesn't need to ack"; + int Acks, default="0", desc="Acks that the dir (mem ctrl) should expect to receive"; + CoherenceRequestType OriginalType, default="CoherenceRequestType_NA", desc="Type of request from core fwded through region buffer"; + WriteMask writeMask, desc="Write Through Data"; + MachineID WTRequestor, desc="Node who initiated the write through"; + HSAScope scope, default="HSAScope_SYSTEM", desc="Request Scope"; + int wfid, default="0", desc="wavefront id"; + bool NoWriteConflict, default="true", desc="write collided with CAB entry"; + int ProgramCounter, desc="PC that accesses to this block"; + + bool functionalRead(Packet *pkt) { + // Only PUTX messages contains the data block + if (Type == CoherenceRequestType:VicDirty) { + return testAndRead(addr, DataBlk, pkt); + } + + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return testAndWrite(addr, DataBlk, pkt); + } +} + +structure(NBProbeRequestMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + ProbeRequestType Type, desc="NB_PrbNxtState signal"; + bool ReturnData, desc="Indicates CPU should return data"; + NetDest Destination, desc="Node to whom the data is sent"; + MessageSizeType MessageSize, desc="size category of the message"; + bool DemandRequest, default="false", desc="demand request, requesting 3-hop transfer"; + Addr DemandAddress, desc="Demand block address for a region request"; + MachineID Requestor, desc="Requestor id for 3-hop requests"; + bool NoAckNeeded, default="false", desc="For short circuting acks"; + int ProgramCounter, desc="PC that accesses to this block"; + + bool functionalRead(Packet *pkt) { + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return false; + } + +} + +structure(TDProbeRequestMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + ProbeRequestType Type, desc="TD_PrbNxtState signal"; + bool ReturnData, desc="Indicates CPU should return data"; + bool localCtoD, desc="Indicates CtoD is within the GPU hierarchy (aka TCC subtree)"; + NetDest Destination, desc="Node to whom the data is sent"; + MessageSizeType MessageSize, desc="size category of the message"; + int Phase, desc="Synchronization Phase"; + int wfid, desc="wavefront id for Release"; + MachineID Requestor, desc="Node who initiated the request"; + + bool functionalRead(Packet *pkt) { + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return false; + } +} + +// Response Messages seemed to be easily munged into one type +structure(ResponseMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + CoherenceResponseType Type, desc="NB Sys Resp or CPU Response to Probe"; + MachineID Sender, desc="Node who sent the data"; + NetDest Destination, desc="Node to whom the data is sent"; + // Begin Used Only By CPU Response + DataBlock DataBlk, desc="data for the cache line"; + bool Hit, desc="probe hit valid line"; + bool Shared, desc="True if S, or if NB Probe ReturnData==1 && O"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + bool Ntsl, desc="indicates probed lin will be invalid after probe"; + bool UntransferredOwner, desc="pending confirmation of ownership change"; + // End Used Only By CPU Response + + // Begin NB Response Only + CoherenceState State, default=CoherenceState_NA, desc="What returned data from NB should be in"; + bool CtoD, desc="was the originator a CtoD?"; + // End NB Response Only + + // Normally if a block gets hit by a probe while waiting to be written back, + // you flip the NbReqShared signal (part of the CPURequest signal group). + // But since this is in packets and I don't want to send a separate packet, + // let's just send this signal back with the data instead + bool NbReqShared, desc="modification of Shared field from initial request, e.g. hit by shared probe"; + + MessageSizeType MessageSize, desc="size category of the message"; + Cycles InitialRequestTime, desc="time the initial requests was sent from the L1Cache"; + Cycles ForwardRequestTime, desc="time the dir forwarded the request"; + Cycles ProbeRequestStartTime, desc="the time the dir started the probe request"; + bool DemandRequest, default="false", desc="For profiling purposes"; + + bool L3Hit, default="false", desc="Did memory or L3 supply the data?"; + MachineID OriginalResponder, desc="Mach which wrote the data to the L3"; + MachineID WTRequestor, desc="Node who started the writethrough"; + + bool NotCached, default="false", desc="True when the Region buffer has already evicted the line"; + + bool NoAckNeeded, default="false", desc="For short circuting acks"; + bool isValid, default="false", desc="Is acked block valid"; + int wfid, default="0", desc="wavefront id"; + int Phase, desc="Synchronization Phase"; + + int ProgramCounter, desc="PC that issues this request"; + bool mispred, desc="tell TCP if the block should not be bypassed"; + + + bool functionalRead(Packet *pkt) { + // Only PUTX messages contains the data block + if (Type == CoherenceResponseType:CPUData || + Type == CoherenceResponseType:MemData) { + return testAndRead(addr, DataBlk, pkt); + } + + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return testAndWrite(addr, DataBlk, pkt); + } +} + +structure(UnblockMsg, desc="...", interface="Message") { + Addr addr, desc="Physical address for this request"; + NetDest Destination, desc="Destination (always directory)"; + MessageSizeType MessageSize, desc="size category of the message"; + MachineID Sender, desc="Node who sent the data"; + bool currentOwner, default="false", desc="Is the sender the current owner"; + bool DoneAck, default="false", desc="Is this a done ack?"; + bool Dirty, default="false", desc="Was block dirty when evicted"; + bool wasValid, default="false", desc="Was block valid when evicted"; + bool valid, default="false", desc="Is block valid"; + bool validToInvalid, default="false", desc="Was block valid when evicted"; + + bool functionalRead(Packet *pkt) { + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return false; + } +} + +enumeration(TriggerType, desc="Trigger Type") { + L2_to_L1, desc="L2 to L1 fill"; + AcksComplete, desc="NB received all needed Acks"; + + // For regions + InvNext, desc="Invalidate the next block"; + PrivateAck, desc="Loopback ack for machines with no Region Buffer"; + AllOutstanding, desc="All outstanding requests have finished"; + L3Hit, desc="L3 hit in dir"; + + // For region directory once the directory is blocked + InvRegion, desc="Invalidate region"; + DowngradeRegion, desc="downgrade region"; + //For writethrough + UnblockWriteThrough, desc="unblock"; + WriteData, desc="Write to full cacheblock data"; + WriteDone, desc="Sequencer says that write is done"; + AtomicDone, desc="Atomic is done"; +} + +enumeration(CacheId, desc="Which Cache in the Core") { + L1I, desc="L1 I-cache"; + L1D0, desc="L1 D-cache cluster 0"; + L1D1, desc="L1 D-cache cluster 1"; + NA, desc="Default"; +} + +structure(TriggerMsg, desc="...", interface="Message") { + Addr addr, desc="Address"; + TriggerType Type, desc="Type of trigger"; + CacheId Dest, default="CacheId_NA", desc="Cache to invalidate"; + int ProgramCounter, desc="PC that accesses to this block"; + + bool functionalRead(Packet *pkt) { + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return false; + } + +} + +enumeration(FifoType, desc="Fifo Type") { + WriteDummy, desc="Dummy Write for atomic operation"; + WriteThrough, desc="simple writethrough request"; + WriteFlush, desc="synchronization message"; +} + +structure(FifoMsg, desc="...", interface="Message") { + Addr addr, desc="Address"; + FifoType Type, desc="WriteThrough/WriteFlush"; + int wfid, default="0",desc="wavefront id"; + MachineID Requestor, desc="Flush Requestor"; + MachineID oRequestor, desc="original Flush Requestor"; + + bool functionalRead(Packet *pkt) { + return false; + } + + bool functionalWrite(Packet *pkt) { + // No check on message type required since the protocol should + // read data from those messages that contain the block + return false; + } + +} diff --git a/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm new file mode 100644 index 000000000..f545c2fa7 --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm @@ -0,0 +1,1408 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu, Sooraj Puthoor + */ + +/* + * This file is based on MOESI_AMD_Base.sm + * Differences with AMD base protocol + * -- Uses a probe filter memory to track sharers. + * -- The probe filter can be inclusive or non-inclusive + * -- Only two sharers tracked. Sharers are a) GPU or/and b) CPU + * -- If sharer information available, the sharer is probed + * -- If sharer information not available, probes are broadcasted + */ + +machine(MachineType:Directory, "AMD Baseline protocol") +: DirectoryMemory * directory; + CacheMemory * L3CacheMemory; + CacheMemory * ProbeFilterMemory; + Cycles response_latency := 5; + Cycles l3_hit_latency := 50; + bool noTCCdir := "False"; + bool CAB_TCC := "False"; + int TCC_select_num_bits:=1; + bool useL3OnWT := "False"; + bool inclusiveDir := "True"; + Cycles to_memory_controller_latency := 1; + + // From the Cores + MessageBuffer * requestFromCores, network="From", virtual_network="0", ordered="false", vnet_type="request"; + MessageBuffer * responseFromCores, network="From", virtual_network="2", ordered="false", vnet_type="response"; + MessageBuffer * unblockFromCores, network="From", virtual_network="4", ordered="false", vnet_type="unblock"; + + MessageBuffer * probeToCore, network="To", virtual_network="0", ordered="false", vnet_type="request"; + MessageBuffer * responseToCore, network="To", virtual_network="2", ordered="false", vnet_type="response"; + + MessageBuffer * triggerQueue, ordered="true"; + MessageBuffer * L3triggerQueue, ordered="true"; + MessageBuffer * responseFromMemory; +{ + // STATES + state_declaration(State, desc="Directory states", default="Directory_State_U") { + U, AccessPermission:Backing_Store, desc="unblocked"; + BL, AccessPermission:Busy, desc="got L3 WB request"; + // BL is Busy because it is busy waiting for the data + // which is possibly in the network. The cache which evicted the data + // might have moved to some other state after doing the eviction + // BS==> Received a read request; has not requested ownership + // B==> Received a read request; has requested ownership + // BM==> Received a modification request + B_P, AccessPermission:Backing_Store, desc="Back invalidation, waiting for probes"; + BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; + BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory"; + BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; + B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack"; + } + + // Events + enumeration(Event, desc="Directory events") { + // CPU requests + RdBlkS, desc="..."; + RdBlkM, desc="..."; + RdBlk, desc="..."; + CtoD, desc="..."; + WriteThrough, desc="WriteThrough Message"; + Atomic, desc="Atomic Message"; + + // writebacks + VicDirty, desc="..."; + VicClean, desc="..."; + CPUData, desc="WB data from CPU"; + StaleWB, desc="Notification that WB has been superceded by a probe"; + + // probe responses + CPUPrbResp, desc="Probe Response Msg"; + + ProbeAcksComplete, desc="Probe Acks Complete"; + + L3Hit, desc="Hit in L3 return data to core"; + + // Replacement + PF_Repl, desc="Replace address from probe filter"; + + // Memory Controller + MemData, desc="Fetched data from memory arrives"; + WBAck, desc="Writeback Ack from memory arrives"; + + CoreUnblock, desc="Core received data, unblock"; + UnblockWriteThrough, desc="Unblock because of writethrough request finishing"; + + StaleVicDirty, desc="Core invalidated before VicDirty processed"; + } + + enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + L3DataArrayRead, desc="Read the data array"; + L3DataArrayWrite, desc="Write the data array"; + L3TagArrayRead, desc="Read the data array"; + L3TagArrayWrite, desc="Write the data array"; + + PFTagArrayRead, desc="Read the data array"; + PFTagArrayWrite, desc="Write the data array"; + } + + // TYPES + + enumeration(ProbeFilterState, desc="") { + T, desc="Tracked"; + NT, desc="Not tracked"; + B, desc="Blocked, This entry is being replaced"; + } + + // DirectoryEntry + structure(Entry, desc="...", interface="AbstractEntry") { + State DirectoryState, desc="Directory state"; + DataBlock DataBlk, desc="data for the block"; + NetDest VicDirtyIgnore, desc="VicDirty coming from whom to ignore"; + } + + structure(CacheEntry, desc="...", interface="AbstractCacheEntry") { + DataBlock DataBlk, desc="data for the block"; + MachineID LastSender, desc="Mach which this block came from"; + ProbeFilterState pfState, desc="ProbeFilter state",default="Directory_ProbeFilterState_NT"; + bool isOnCPU, desc="Block valid in the CPU complex",default="false"; + bool isOnGPU, desc="Block valid in the GPU complex",default="false"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block"; + bool Dirty, desc="Is the data dirty?"; + int NumPendingAcks, desc="num acks expected"; + MachineID OriginalRequestor, desc="Original Requestor"; + MachineID WTRequestor, desc="WT Requestor"; + bool Cached, desc="data hit in Cache"; + bool MemData, desc="Got MemData?",default="false"; + bool wtData, desc="Got write through data?",default="false"; + bool atomicData, desc="Got Atomic op?",default="false"; + Cycles InitialRequestTime, desc="..."; + Cycles ForwardRequestTime, desc="..."; + Cycles ProbeRequestStartTime, desc="..."; + MachineID LastSender, desc="Mach which this block came from"; + bool L3Hit, default="false", desc="Was this an L3 hit?"; + uint64_t probe_id, desc="probe id for lifetime profiling"; + WriteMask writeMask, desc="outstanding write through mask"; + Addr demandAddress, desc="Address of demand request which caused probe filter eviction"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs"; + + int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + + Tick clockEdge(); + Tick cyclesToTicks(Cycles c); + + void set_tbe(TBE a); + void unset_tbe(); + void wakeUpAllBuffers(); + void wakeUpBuffers(Addr a); + Cycles curCycle(); + + Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" { + Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr)); + + if (is_valid(dir_entry)) { + //DPRINTF(RubySlicc, "Getting entry %s: %s\n", addr, dir_entry.DataBlk); + return dir_entry; + } + + dir_entry := static_cast(Entry, "pointer", + directory.allocate(addr, new Entry)); + return dir_entry; + } + + DataBlock getDataBlock(Addr addr), return_by_ref="yes" { + TBE tbe := TBEs.lookup(addr); + if (is_valid(tbe) && tbe.MemData) { + DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe); + return tbe.DataBlk; + } + DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr)); + return getDirectoryEntry(addr).DataBlk; + } + + State getState(TBE tbe, CacheEntry entry, Addr addr) { + CacheEntry probeFilterEntry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(addr)); + if (inclusiveDir) { + if (is_valid(probeFilterEntry) && probeFilterEntry.pfState == ProbeFilterState:B) { + return State:B_P; + } + } + return getDirectoryEntry(addr).DirectoryState; + } + + void setState(TBE tbe, CacheEntry entry, Addr addr, State state) { + getDirectoryEntry(addr).DirectoryState := state; + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + functionalMemoryRead(pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs.lookup(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + } + + num_functional_writes := num_functional_writes + + functionalMemoryWrite(pkt); + return num_functional_writes; + } + + AccessPermission getAccessPermission(Addr addr) { + // For this Directory, all permissions are just tracked in Directory, since + // it's not possible to have something in TBE but not Dir, just keep track + // of state all in one place. + if (directory.isPresent(addr)) { + return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState); + } + + return AccessPermission:NotPresent; + } + + void setAccessPermission(CacheEntry entry, Addr addr, State state) { + getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state)); + } + + void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:L3DataArrayRead) { + L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:L3DataArrayWrite) { + L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:L3TagArrayRead) { + L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:L3TagArrayWrite) { + L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } else if (request_type == RequestType:PFTagArrayRead) { + ProbeFilterMemory.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:PFTagArrayWrite) { + ProbeFilterMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } + } + + bool checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:L3DataArrayRead) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L3DataArrayWrite) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:L3TagArrayRead) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:L3TagArrayWrite) { + return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:PFTagArrayRead) { + return ProbeFilterMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:PFTagArrayWrite) { + return ProbeFilterMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } + } + + bool isNotPresentProbeFilter(Addr address) { + if (ProbeFilterMemory.isTagPresent(address) || + ProbeFilterMemory.cacheAvail(address)) { + return false; + } + return true; + } + + bool isGPUSharer(Addr address) { + assert(ProbeFilterMemory.isTagPresent(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); + if (entry.pfState == ProbeFilterState:NT) { + return true; + } else if (entry.isOnGPU){ + return true; + } + return false; + } + + bool isCPUSharer(Addr address) { + assert(ProbeFilterMemory.isTagPresent(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); + if (entry.pfState == ProbeFilterState:NT) { + return true; + } else if (entry.isOnCPU){ + return true; + } + return false; + } + + + // ** OUT_PORTS ** + out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore); + out_port(responseNetwork_out, ResponseMsg, responseToCore); + + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue); + + // ** IN_PORTS ** + + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=5) { + if (triggerQueue_in.isReady(clockEdge())) { + peek(triggerQueue_in, TriggerMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == TriggerType:AcksComplete) { + trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe); + }else if (in_msg.Type == TriggerType:UnblockWriteThrough) { + trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe); + } else { + error("Unknown trigger msg"); + } + } + } + } + + in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=4) { + if (L3TriggerQueue_in.isReady(clockEdge())) { + peek(L3TriggerQueue_in, TriggerMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == TriggerType:L3Hit) { + trigger(Event:L3Hit, in_msg.addr, entry, tbe); + } else { + error("Unknown trigger msg"); + } + } + } + } + + // Unblock Network + in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=3) { + if (unblockNetwork_in.isReady(clockEdge())) { + peek(unblockNetwork_in, UnblockMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + trigger(Event:CoreUnblock, in_msg.addr, entry, tbe); + } + } + } + + // Core response network + in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=2) { + if (responseNetwork_in.isReady(clockEdge())) { + peek(responseNetwork_in, ResponseMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == CoherenceResponseType:CPUPrbResp) { + trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:CPUData) { + trigger(Event:CPUData, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:StaleNotif) { + trigger(Event:StaleWB, in_msg.addr, entry, tbe); + } else { + error("Unexpected response type"); + } + } + } + } + + // off-chip memory request/response is done + in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=1) { + if (memQueue_in.isReady(clockEdge())) { + peek(memQueue_in, MemoryMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:MemData, in_msg.addr, entry, tbe); + DPRINTF(RubySlicc, "%s\n", in_msg); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them. + } else { + DPRINTF(RubySlicc, "%s\n", in_msg.Type); + error("Invalid message"); + } + } + } + } + + in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) { + if (requestNetwork_in.isReady(clockEdge())) { + peek(requestNetwork_in, CPURequestMsg) { + TBE tbe := TBEs.lookup(in_msg.addr); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); + if (inclusiveDir && isNotPresentProbeFilter(in_msg.addr)) { + Addr victim := ProbeFilterMemory.cacheProbe(in_msg.addr); + tbe := TBEs.lookup(victim); + entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(victim)); + trigger(Event:PF_Repl, victim, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlk) { + trigger(Event:RdBlk, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { + trigger(Event:RdBlkS, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { + trigger(Event:RdBlkM, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:WriteThrough) { + trigger(Event:WriteThrough, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:Atomic) { + trigger(Event:Atomic, in_msg.addr, entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:VicDirty) { + if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { + DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr); + trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe); + } else { + DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr); + trigger(Event:VicDirty, in_msg.addr, entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:VicClean) { + if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { + DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr); + trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe); + } else { + DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr); + trigger(Event:VicClean, in_msg.addr, entry, tbe); + } + } else { + error("Bad request message type"); + } + } + } + } + + // Actions + action(s_sendResponseS, "s", desc="send Shared response") { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Shared; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := tbe.ForwardRequestTime; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(es_sendResponseES, "es", desc="send Exclusive or Shared response") { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := tbe.Dirty; + if (tbe.Cached) { + out_msg.State := CoherenceState:Shared; + } else { + out_msg.State := CoherenceState:Exclusive; + } + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := tbe.ForwardRequestTime; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + // write-through and atomics do not send an unblock ack back to the + // directory. Hence, directory has to generate a self unblocking + // message. Additionally, write through's does not require data + // in its response. Hence, write through is treated seperately from + // write-back and atomics + action(m_sendResponseM, "m", desc="send Modified response") { + if (tbe.wtData) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:UnblockWriteThrough; + } + }else{ + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + if (tbe.L3Hit) { + out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); + } else { + out_msg.Sender := machineID; + } + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := tbe.Dirty; + out_msg.State := CoherenceState:Modified; + out_msg.CtoD := false; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := tbe.ForwardRequestTime; + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.OriginalResponder := tbe.LastSender; + if(tbe.atomicData){ + out_msg.WTRequestor := tbe.WTRequestor; + } + out_msg.L3Hit := tbe.L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + if (tbe.atomicData) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:UnblockWriteThrough; + } + } + } + } + + action(c_sendResponseCtoD, "c", desc="send CtoD Ack") { + enqueue(responseNetwork_out, ResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysResp; + out_msg.Sender := machineID; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Modified; + out_msg.CtoD := true; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + + action(w_sendResponseWBAck, "w", desc="send WB Ack") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysWBAck; + out_msg.Destination.add(in_msg.Requestor); + out_msg.WTRequestor := in_msg.WTRequestor; + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := curCycle(); + } + } + } + + action(l_queueMemWBReq, "lq", desc="Write WB data to memory") { + peek(responseNetwork_in, ResponseMsg) { + queueMemoryWrite(machineID, address, to_memory_controller_latency, + in_msg.DataBlk); + } + } + + action(l_queueMemRdReq, "lr", desc="Read data from memory") { + peek(requestNetwork_in, CPURequestMsg) { + if (L3CacheMemory.isTagPresent(address)) { + enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L3Hit; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + tbe.DataBlk := entry.DataBlk; + tbe.LastSender := entry.LastSender; + tbe.L3Hit := true; + tbe.MemData := true; + L3CacheMemory.deallocate(address); + } else { + queueMemoryRead(machineID, address, to_memory_controller_latency); + } + } + } + + action(dc_probeInvCoreData, "dc", desc="probe inv cores, return data") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + if(isCPUSharer(address)) { + out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + } + + // add relevant TCC node to list. This replaces all TCPs and SQCs + if(isGPUSharer(address)) { + if ((in_msg.Type == CoherenceRequestType:WriteThrough || + in_msg.Type == CoherenceRequestType:Atomic) && + in_msg.NoWriteConflict) { + // Don't Include TCCs unless there was write-CAB conflict in the TCC + } else if(noTCCdir) { + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + } else { + out_msg.Destination.add(map_Address_to_TCCdir(address)); + } + } + out_msg.Destination.remove(in_msg.Requestor); + tbe.NumPendingAcks := out_msg.Destination.count(); + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + DPRINTF(RubySlicc, "%s\n", out_msg); + APPEND_TRANSITION_COMMENT(" dc: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(bp_backProbe, "bp", desc="back probe") { + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + if(isCPUSharer(address)) { + // won't be realistic for multisocket + out_msg.Destination.broadcast(MachineType:CorePair); + } + // add relevant TCC node to the list. This replaces all TCPs and SQCs + if(isGPUSharer(address)) { + if (noTCCdir) { + //Don't need to notify TCC about reads + } else { + out_msg.Destination.add(map_Address_to_TCCdir(address)); + tbe.NumPendingAcks := tbe.NumPendingAcks + 1; + } + if (noTCCdir && CAB_TCC) { + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + } + } + tbe.NumPendingAcks := out_msg.Destination.count(); + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + DPRINTF(RubySlicc, "%s\n", (out_msg)); + APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + APPEND_TRANSITION_COMMENT(" - back probe"); + tbe.ProbeRequestStartTime := curCycle(); + } + } + + action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") { + peek(requestNetwork_in, CPURequestMsg) { // not the right network? + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + if(isCPUSharer(address)) { + out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + } + // add relevant TCC node to the list. This replaces all TCPs and SQCs + if(isGPUSharer(address)) { + if (noTCCdir) { + //Don't need to notify TCC about reads + } else { + out_msg.Destination.add(map_Address_to_TCCdir(address)); + tbe.NumPendingAcks := tbe.NumPendingAcks + 1; + } + if (noTCCdir && CAB_TCC) { + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + } + } + out_msg.Destination.remove(in_msg.Requestor); + tbe.NumPendingAcks := out_msg.Destination.count(); + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + DPRINTF(RubySlicc, "%s\n", (out_msg)); + APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") { + peek(requestNetwork_in, CPURequestMsg) { // not the right network? + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := false; + out_msg.MessageSize := MessageSizeType:Control; + if(isCPUSharer(address)) { + out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + } + + // add relevant TCC node to the list. This replaces all TCPs and SQCs + if(isGPUSharer(address)) { + if (noTCCdir) { + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + } else { + out_msg.Destination.add(map_Address_to_TCCdir(address)); + } + } + out_msg.Destination.remove(in_msg.Requestor); + tbe.NumPendingAcks := out_msg.Destination.count(); + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + APPEND_TRANSITION_COMMENT(" ic: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + DPRINTF(RubySlicc, "%s\n", out_msg); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + + action(sm_setMRU, "sm", desc="set probe filter entry as MRU") { + ProbeFilterMemory.setMRU(address); + } + + action(d_writeDataToMemory, "d", desc="Write data to memory") { + peek(responseNetwork_in, ResponseMsg) { + getDirectoryEntry(address).DataBlk := in_msg.DataBlk; + DPRINTF(RubySlicc, "Writing Data: %s to address %s\n", in_msg.DataBlk, + in_msg.addr); + } + } + + action(te_allocateTBEForEviction, "te", desc="allocate TBE Entry") { + check_allocate(TBEs); + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.writeMask.clear(); + tbe.wtData := false; + tbe.atomicData := false; + tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs + tbe.Dirty := false; + tbe.NumPendingAcks := 0; + } + + action(t_allocateTBE, "t", desc="allocate TBE Entry") { + check_allocate(TBEs); + peek(requestNetwork_in, CPURequestMsg) { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + tbe.writeMask.clear(); + tbe.writeMask.orMask(in_msg.writeMask); + tbe.wtData := true; + tbe.WTRequestor := in_msg.WTRequestor; + tbe.LastSender := in_msg.Requestor; + } + if (in_msg.Type == CoherenceRequestType:Atomic) { + tbe.writeMask.clear(); + tbe.writeMask.orMask(in_msg.writeMask); + tbe.atomicData := true; + tbe.WTRequestor := in_msg.WTRequestor; + tbe.LastSender := in_msg.Requestor; + } + tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs + tbe.Dirty := false; + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask); + tbe.Dirty := false; + } + tbe.OriginalRequestor := in_msg.Requestor; + tbe.NumPendingAcks := 0; + tbe.Cached := in_msg.ForceShared; + tbe.InitialRequestTime := in_msg.InitialRequestTime; + } + } + + action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") { + if (tbe.Dirty == false) { + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } + TBEs.deallocate(address); + unset_tbe(); + } + + action(wd_writeBackData, "wd", desc="Write back data if needed") { + if (tbe.wtData) { + DataBlock tmp := getDirectoryEntry(address).DataBlk; + tmp.copyPartial(tbe.DataBlk,tbe.writeMask); + tbe.DataBlk := tmp; + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } else if (tbe.atomicData) { + tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk, + tbe.writeMask); + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } else if (tbe.Dirty == false) { + getDirectoryEntry(address).DataBlk := tbe.DataBlk; + } + } + + action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") { + peek(memQueue_in, MemoryMsg) { + if (tbe.wtData == true) { + // DO Nothing (already have the directory data) + } else if (tbe.Dirty == false) { + tbe.DataBlk := getDirectoryEntry(address).DataBlk; + } + tbe.MemData := true; + } + } + + action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") { + peek(responseNetwork_in, ResponseMsg) { + if (in_msg.Dirty) { + DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender); + DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk); + if (tbe.wtData) { + DataBlock tmp := in_msg.DataBlk; + tmp.copyPartial(tbe.DataBlk,tbe.writeMask); + tbe.DataBlk := tmp; + } else if (tbe.Dirty) { + if(tbe.atomicData == false && tbe.wtData == false) { + DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender); + assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data + } + } else { + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + tbe.LastSender := in_msg.Sender; + } + } + if (in_msg.Hit) { + tbe.Cached := true; + } + } + } + + action(mwc_markSinkWriteCancel, "mwc", desc="Mark to sink impending VicDirty") { + peek(responseNetwork_in, ResponseMsg) { + DPRINTF(RubySlicc, "Write cancel bit set on address %s\n", address); + getDirectoryEntry(address).VicDirtyIgnore.add(in_msg.Sender); + APPEND_TRANSITION_COMMENT(" setting bit to sink VicDirty "); + } + } + + action(x_decrementAcks, "x", desc="decrement Acks pending") { + tbe.NumPendingAcks := tbe.NumPendingAcks - 1; + APPEND_TRANSITION_COMMENT(" Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + } + + action(o_checkForCompletion, "o", desc="check for ack completion") { + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + APPEND_TRANSITION_COMMENT(" Check: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + } + + action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") { + peek(requestNetwork_in, CPURequestMsg) { + getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor); + } + } + + action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") { + peek(responseNetwork_in, ResponseMsg) { + if (L3CacheMemory.isTagPresent(address)) { + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) "); + entry.DataBlk := in_msg.DataBlk; + entry.LastSender := in_msg.Sender; + } else { + if (L3CacheMemory.cacheAvail(address) == false) { + Addr victim := L3CacheMemory.cacheProbe(address); + CacheEntry victim_entry := static_cast(CacheEntry, "pointer", + L3CacheMemory.lookup(victim)); + queueMemoryWrite(machineID, victim, to_memory_controller_latency, + victim_entry.DataBlk); + L3CacheMemory.deallocate(victim); + } + assert(L3CacheMemory.cacheAvail(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 "); + entry.DataBlk := in_msg.DataBlk; + + entry.LastSender := in_msg.Sender; + } + } + } + + action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") { + if ((tbe.wtData || tbe.atomicData) && useL3OnWT) { + if (L3CacheMemory.isTagPresent(address)) { + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) "); + entry.DataBlk := tbe.DataBlk; + entry.LastSender := tbe.LastSender; + } else { + if (L3CacheMemory.cacheAvail(address) == false) { + Addr victim := L3CacheMemory.cacheProbe(address); + CacheEntry victim_entry := static_cast(CacheEntry, "pointer", + L3CacheMemory.lookup(victim)); + queueMemoryWrite(machineID, victim, to_memory_controller_latency, + victim_entry.DataBlk); + L3CacheMemory.deallocate(victim); + } + assert(L3CacheMemory.cacheAvail(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); + APPEND_TRANSITION_COMMENT(" al wrote data to L3 "); + entry.DataBlk := tbe.DataBlk; + entry.LastSender := tbe.LastSender; + } + } + } + + action(apf_allocateProbeFilterEntry, "apf", desc="Allocate probe filte entry") { + if (!ProbeFilterMemory.isTagPresent(address)) { + if (inclusiveDir) { + assert(ProbeFilterMemory.cacheAvail(address)); + } else if (ProbeFilterMemory.cacheAvail(address) == false) { + Addr victim := ProbeFilterMemory.cacheProbe(address); + ProbeFilterMemory.deallocate(victim); + } + assert(ProbeFilterMemory.cacheAvail(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.allocate(address, new CacheEntry)); + APPEND_TRANSITION_COMMENT(" allocating a new probe filter entry"); + entry.pfState := ProbeFilterState:NT; + if (inclusiveDir) { + entry.pfState := ProbeFilterState:T; + } + entry.isOnCPU := false; + entry.isOnGPU := false; + } + } + + action(mpfe_markPFEntryForEviction, "mpfe", desc="Mark this PF entry is being evicted") { + assert(ProbeFilterMemory.isTagPresent(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); + entry.pfState := ProbeFilterState:B; + peek(requestNetwork_in, CPURequestMsg) { + tbe.demandAddress := in_msg.addr; + } + } + + action(we_wakeUpEvictionDependents, "we", desc="Wake up requests waiting for demand address and victim address") { + wakeUpBuffers(address); + wakeUpBuffers(tbe.demandAddress); + } + + action(dpf_deallocateProbeFilter, "dpf", desc="deallocate PF entry") { + assert(ProbeFilterMemory.isTagPresent(address)); + ProbeFilterMemory.deallocate(address); + } + + action(upf_updateProbeFilter, "upf", desc="") { + peek(requestNetwork_in, CPURequestMsg) { + assert(ProbeFilterMemory.isTagPresent(address)); + CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); + if (in_msg.Type == CoherenceRequestType:WriteThrough) { + entry.pfState := ProbeFilterState:T; + entry.isOnCPU := false; + entry.isOnGPU := false; + } else if (in_msg.Type == CoherenceRequestType:Atomic) { + entry.pfState := ProbeFilterState:T; + entry.isOnCPU := false; + entry.isOnGPU := false; + } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { + entry.pfState := ProbeFilterState:T; + entry.isOnCPU := false; + entry.isOnGPU := false; + } else if (in_msg.Type == CoherenceRequestType:CtoD) { + entry.pfState := ProbeFilterState:T; + entry.isOnCPU := false; + entry.isOnGPU := false; + } + if(machineIDToMachineType(in_msg.Requestor) == MachineType:CorePair) { + entry.isOnCPU := true; + } else { + entry.isOnGPU := true; + } + } + } + + action(rmcd_removeSharerConditional, "rmcd", desc="remove sharer from probe Filter, conditional") { + peek(requestNetwork_in, CPURequestMsg) { + if (ProbeFilterMemory.isTagPresent(address)) { + CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); + if(machineIDToMachineType(in_msg.Requestor) == MachineType:CorePair) {//CorePair has inclusive L2 + if (in_msg.Type == CoherenceRequestType:VicDirty) { + entry.isOnCPU := false; + } else if (in_msg.Type == CoherenceRequestType:VicClean) { + entry.isOnCPU := false; + } + } + } + } + } + + action(sf_setForwardReqTime, "sf", desc="...") { + tbe.ForwardRequestTime := curCycle(); + } + + action(dl_deallocateL3, "dl", desc="deallocate the L3 block") { + L3CacheMemory.deallocate(address); + } + + action(p_popRequestQueue, "p", desc="pop request queue") { + requestNetwork_in.dequeue(clockEdge()); + } + + action(pr_popResponseQueue, "pr", desc="pop response queue") { + responseNetwork_in.dequeue(clockEdge()); + } + + action(pm_popMemQueue, "pm", desc="pop mem queue") { + memQueue_in.dequeue(clockEdge()); + } + + action(pt_popTriggerQueue, "pt", desc="pop trigger queue") { + triggerQueue_in.dequeue(clockEdge()); + } + + action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") { + L3TriggerQueue_in.dequeue(clockEdge()); + } + + action(pu_popUnblockQueue, "pu", desc="pop unblock queue") { + unblockNetwork_in.dequeue(clockEdge()); + } + + action(zz_recycleRequestQueue, "zz", desc="recycle request queue") { + requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(yy_recycleResponseQueue, "yy", desc="recycle response queue") { + responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); + } + + action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") { + stall_and_wait(requestNetwork_in, address); + } + + action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") { + wakeUpBuffers(address); + } + + action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") { + wakeUpAllBuffers(); + } + + action(z_stall, "z", desc="...") { + } + + // TRANSITIONS + transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) { + st_stallAndWaitRequest; + } + + // It may be possible to save multiple invalidations here! + transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, {Atomic, WriteThrough}) { + st_stallAndWaitRequest; + } + + + // transitions from U + transition(U, PF_Repl, B_P) {PFTagArrayRead, PFTagArrayWrite}{ + te_allocateTBEForEviction; + apf_allocateProbeFilterEntry; + bp_backProbe; + sm_setMRU; + mpfe_markPFEntryForEviction; + } + + transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} { + t_allocateTBE; + apf_allocateProbeFilterEntry; + l_queueMemRdReq; + sc_probeShrCoreData; + sm_setMRU; + upf_updateProbeFilter; + p_popRequestQueue; + } + + transition(U, WriteThrough, BM_PM) {L3TagArrayRead, L3TagArrayWrite, PFTagArrayRead, PFTagArrayWrite} { + t_allocateTBE; + apf_allocateProbeFilterEntry; + w_sendResponseWBAck; + l_queueMemRdReq; + dc_probeInvCoreData; + sm_setMRU; + upf_updateProbeFilter; + p_popRequestQueue; + } + + transition(U, Atomic, BM_PM) {L3TagArrayRead, L3TagArrayWrite, PFTagArrayRead, PFTagArrayWrite} { + t_allocateTBE; + apf_allocateProbeFilterEntry; + l_queueMemRdReq; + dc_probeInvCoreData; + sm_setMRU; + upf_updateProbeFilter; + p_popRequestQueue; + } + + transition(U, {RdBlkM}, BM_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} { + t_allocateTBE; + apf_allocateProbeFilterEntry; + l_queueMemRdReq; + dc_probeInvCoreData; + sm_setMRU; + upf_updateProbeFilter; + p_popRequestQueue; + } + + transition(U, RdBlk, B_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite}{ + t_allocateTBE; + apf_allocateProbeFilterEntry; + l_queueMemRdReq; + sc_probeShrCoreData; + sm_setMRU; + upf_updateProbeFilter; + p_popRequestQueue; + } + + transition(U, CtoD, BP) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} { + t_allocateTBE; + apf_allocateProbeFilterEntry; + ic_probeInvCore; + sm_setMRU; + upf_updateProbeFilter; + p_popRequestQueue; + } + + transition(U, VicDirty, BL) {L3TagArrayRead} { + t_allocateTBE; + w_sendResponseWBAck; + rmcd_removeSharerConditional; + p_popRequestQueue; + } + + transition(U, VicClean, BL) {L3TagArrayRead} { + t_allocateTBE; + w_sendResponseWBAck; + rmcd_removeSharerConditional; + p_popRequestQueue; + } + + transition(BL, {VicDirty, VicClean}) { + zz_recycleRequestQueue; + } + + transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} { + d_writeDataToMemory; + al_allocateL3Block; + wa_wakeUpDependents; + dt_deallocateTBE; + //l_queueMemWBReq; // why need an ack? esp. with DRAMSim, just put it in queue no ack needed + pr_popResponseQueue; + } + + transition(BL, StaleWB, U) {L3TagArrayWrite} { + dt_deallocateTBE; + wa_wakeUpAllDependents; + pr_popResponseQueue; + } + + transition({B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P}, {VicDirty, VicClean}) { + z_stall; + } + + transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, WBAck) { + pm_popMemQueue; + } + + transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, PF_Repl) { + zz_recycleRequestQueue; + } + + transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, StaleVicDirty) { + rv_removeVicDirtyIgnore; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition({B}, CoreUnblock, U) { + wa_wakeUpDependents; + pu_popUnblockQueue; + } + + transition(B, UnblockWriteThrough, U) { + wa_wakeUpDependents; + pt_popTriggerQueue; + } + + transition(BS_PM, MemData, BS_Pm) {} { + mt_writeMemDataToTBE; + pm_popMemQueue; + } + + transition(BM_PM, MemData, BM_Pm){} { + mt_writeMemDataToTBE; + pm_popMemQueue; + } + + transition(B_PM, MemData, B_Pm){} { + mt_writeMemDataToTBE; + pm_popMemQueue; + } + + transition(BS_PM, L3Hit, BS_Pm) {} { + ptl_popTriggerQueue; + } + + transition(BM_PM, L3Hit, BM_Pm) {} { + ptl_popTriggerQueue; + } + + transition(B_PM, L3Hit, B_Pm) {} { + ptl_popTriggerQueue; + } + + transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { + mt_writeMemDataToTBE; + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pm_popMemQueue; + } + + transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { + mt_writeMemDataToTBE; + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pm_popMemQueue; + } + + transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { + mt_writeMemDataToTBE; + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pm_popMemQueue; + } + + transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} { + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + ptl_popTriggerQueue; + } + + transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} { + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + ptl_popTriggerQueue; + } + + transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} { + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + ptl_popTriggerQueue; + } + + transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, BP}, CPUPrbResp) { + y_writeProbeDataToTBE; + x_decrementAcks; + o_checkForCompletion; + pr_popResponseQueue; + } + + transition(BS_PM, ProbeAcksComplete, BS_M) {} { + sf_setForwardReqTime; + pt_popTriggerQueue; + } + + transition(BM_PM, ProbeAcksComplete, BM_M) {} { + sf_setForwardReqTime; + pt_popTriggerQueue; + } + + transition(B_PM, ProbeAcksComplete, B_M){} { + sf_setForwardReqTime; + pt_popTriggerQueue; + } + + transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + sf_setForwardReqTime; + s_sendResponseS; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + sf_setForwardReqTime; + m_sendResponseM; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { + sf_setForwardReqTime; + es_sendResponseES; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(B_P, ProbeAcksComplete, U) { + wd_writeBackData; + alwt_allocateL3BlockOnWT; + we_wakeUpEvictionDependents; + dpf_deallocateProbeFilter; + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} { + sf_setForwardReqTime; + c_sendResponseCtoD; + wd_writeBackData; + alwt_allocateL3BlockOnWT; + dt_deallocateTBE; + pt_popTriggerQueue; + } +} diff --git a/src/mem/protocol/MOESI_AMD_Base.slicc b/src/mem/protocol/MOESI_AMD_Base.slicc new file mode 100644 index 000000000..b38145246 --- /dev/null +++ b/src/mem/protocol/MOESI_AMD_Base.slicc @@ -0,0 +1,6 @@ +protocol "MOESI_AMD_Base"; +include "RubySlicc_interfaces.slicc"; +include "MOESI_AMD_Base-msg.sm"; +include "MOESI_AMD_Base-CorePair.sm"; +include "MOESI_AMD_Base-L3cache.sm"; +include "MOESI_AMD_Base-dir.sm"; diff --git a/src/mem/protocol/RubySlicc_ComponentMapping.sm b/src/mem/protocol/RubySlicc_ComponentMapping.sm index a72492b42..e1d7c4399 100644 --- a/src/mem/protocol/RubySlicc_ComponentMapping.sm +++ b/src/mem/protocol/RubySlicc_ComponentMapping.sm @@ -37,7 +37,10 @@ MachineID mapAddressToRange(Addr addr, MachineType type, NetDest broadcast(MachineType type); MachineID map_Address_to_DMA(Addr addr); MachineID map_Address_to_Directory(Addr addr); +MachineID map_Address_to_RegionDir(Addr addr); NodeID map_Address_to_DirectoryNode(Addr addr); +MachineID map_Address_to_TCCdir(Addr addr); +NodeID map_Address_to_TCCdirNode(Addr addr); NodeID machineIDToNodeID(MachineID machID); NodeID machineIDToVersion(MachineID machID); MachineType machineIDToMachineType(MachineID machID); diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm index 5ee26d65c..c743ebe28 100644 --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -62,7 +62,7 @@ bool testAndWrite(Addr addr, DataBlock datablk, Packet *pkt); // AccessPermission // The following five states define the access permission of all memory blocks. -// These permissions have multiple uses. They coordinate locking and +// These permissions have multiple uses. They coordinate locking and // synchronization primitives, as well as enable functional accesses. // One should not need to add any additional permission values and it is very // risky to do so. @@ -73,7 +73,7 @@ enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent") Read_Write, desc="block is Read/Write"; // Possibly Invalid data - // The maybe stale permission indicates that accordingly to the protocol, + // The maybe stale permission indicates that accordingly to the protocol, // there is no guarantee the block contains valid data. However, functional // writes should update the block because a dataless PUT request may // revalidate the block's data. @@ -227,6 +227,13 @@ enumeration(MachineType, desc="...", default="MachineType_NULL") { Collector, desc="Collector Mach"; L1Cache_wCC, desc="L1 Cache Mach to track cache-to-cache transfer (used for miss latency profile)"; L2Cache_wCC, desc="L2 Cache Mach to track cache-to-cache transfer (used for miss latency profile)"; + CorePair, desc="Cache Mach (2 cores, Private L1Ds, Shared L1I & L2)"; + TCP, desc="GPU L1 Data Cache (Texture Cache per Pipe)"; + TCC, desc="GPU L2 Shared Cache (Texture Cache per Channel)"; + TCCdir, desc="Directory at the GPU L2 Cache (TCC)"; + SQC, desc="GPU L1 Instr Cache (Sequencer Cache)"; + RegionDir, desc="Region-granular directory"; + RegionBuffer,desc="Region buffer for CPU and GPU"; NULL, desc="null mach type"; } diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm index a6c57e1b0..b8d284725 100644 --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -31,8 +31,8 @@ // // **PLEASE NOTE!** When adding objects to this file you must also add a line -// in the src/mem/ruby/SConscript file. Otherwise the external object's .hh -// file will not be copied to the protocol directory and you will encounter a +// in the src/mem/ruby/SConscript file. Otherwise the external object's .hh +// file will not be copied to the protocol directory and you will encounter a // undefined declaration error. // @@ -95,6 +95,8 @@ structure (NetDest, external = "yes", non_obj="yes") { bool intersectionIsEmpty(Set); bool intersectionIsEmpty(NetDest); MachineID smallestElement(MachineType); + NetDest OR(NetDest); + NetDest AND(NetDest); } structure (Sequencer, external = "yes") { @@ -117,6 +119,44 @@ structure (Sequencer, external = "yes") { void invalidateSC(Addr); } +structure (GPUCoalescer, external = "yes") { + void readCallback(Addr, DataBlock); + void readCallback(Addr, MachineType, DataBlock); + void readCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles); + void readCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles, bool); + void writeCallback(Addr, DataBlock); + void writeCallback(Addr, MachineType, DataBlock); + void writeCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles); + void writeCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles, bool); + void checkCoherence(Addr); + void evictionCallback(Addr); + void recordCPReadCallBack(MachineID, MachineID); + void recordCPWriteCallBack(MachineID, MachineID); +} + +structure (VIPERCoalescer, external = "yes") { + void readCallback(Addr, DataBlock); + void readCallback(Addr, MachineType, DataBlock); + void readCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles); + void readCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles, bool); + void writeCallback(Addr, DataBlock); + void writeCallback(Addr, MachineType, DataBlock); + void writeCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles); + void writeCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles, bool); + void invCallback(Addr); + void wbCallback(Addr); + void checkCoherence(Addr); + void evictionCallback(Addr); +} + structure(RubyRequest, desc="...", interface="Message", external="yes") { Addr LineAddress, desc="Line address for this request"; Addr PhysicalAddress, desc="Physical address for this request"; @@ -161,6 +201,7 @@ structure (CacheMemory, external = "yes") { Cycles getTagLatency(); Cycles getDataLatency(); void setMRU(Addr); + void setMRU(Addr, int); void setMRU(AbstractCacheEntry); void recordRequestType(CacheRequestType, Addr); bool checkResourceAvailable(CacheResourceType, Addr); diff --git a/src/mem/protocol/SConsopts b/src/mem/protocol/SConsopts index ca432a73e..47b36e276 100644 --- a/src/mem/protocol/SConsopts +++ b/src/mem/protocol/SConsopts @@ -33,6 +33,11 @@ import os Import('*') all_protocols.extend([ + 'GPU_VIPER', + 'GPU_VIPER_Baseline', + 'GPU_VIPER_Region', + 'GPU_RfO', + 'MOESI_AMD_Base', 'MESI_Two_Level', 'MESI_Three_Level', 'MI_example', diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript index 16e932432..82a16c9b0 100644 --- a/src/mem/ruby/SConscript +++ b/src/mem/ruby/SConscript @@ -124,13 +124,20 @@ MakeInclude('common/Set.hh') MakeInclude('common/WriteMask.hh') MakeInclude('filters/AbstractBloomFilter.hh') MakeInclude('network/MessageBuffer.hh') -MakeInclude('structures/Prefetcher.hh') MakeInclude('structures/CacheMemory.hh') -MakeInclude('system/DMASequencer.hh') MakeInclude('structures/DirectoryMemory.hh') -MakeInclude('structures/WireBuffer.hh') MakeInclude('structures/PerfectCacheMemory.hh') MakeInclude('structures/PersistentTable.hh') -MakeInclude('system/Sequencer.hh') +MakeInclude('structures/Prefetcher.hh') MakeInclude('structures/TBETable.hh') MakeInclude('structures/TimerTable.hh') +MakeInclude('structures/WireBuffer.hh') +MakeInclude('system/DMASequencer.hh') +MakeInclude('system/Sequencer.hh') + +# External types : Group "mem/protocol" : include "header.hh" to the bottom +# of this MakeIncludes if it is referenced as +# <# include "mem/protocol/header.hh"> in any file +# generated_dir = Dir('../protocol') +MakeInclude('system/GPUCoalescer.hh') +MakeInclude('system/VIPERCoalescer.hh') diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index b3b37e5a6..7d3f20982 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -269,7 +269,7 @@ Profiler::collateStats() it != m_ruby_system->m_abstract_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; - Sequencer *seq = ctr->getSequencer(); + Sequencer *seq = ctr->getCPUSequencer(); if (seq != NULL) { m_outstandReqHist.add(seq->getOutstandReqHist()); } @@ -282,7 +282,7 @@ Profiler::collateStats() it != m_ruby_system->m_abstract_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; - Sequencer *seq = ctr->getSequencer(); + Sequencer *seq = ctr->getCPUSequencer(); if (seq != NULL) { // add all the latencies m_latencyHist.add(seq->getLatencyHist()); diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh index 926556781..cbd068c04 100644 --- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh +++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh @@ -56,6 +56,12 @@ class AbstractCacheEntry : public AbstractEntry virtual DataBlock& getDataBlk() { panic("getDataBlk() not implemented!"); } + int validBlocks; + virtual int& getNumValidBlocks() + { + return validBlocks; + } + // Functions for locking and unlocking the cache entry. These are required // for supporting atomic memory accesses. void setLocked(int context); diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index 93fe50c88..458fde5bc 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -200,6 +200,12 @@ AbstractController::unblock(Addr addr) } } +bool +AbstractController::isBlocked(Addr addr) +{ + return (m_block_map.count(addr) > 0); +} + BaseMasterPort & AbstractController::getMasterPort(const std::string &if_name, PortID idx) diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index 383507eed..4488ee3f4 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -73,6 +73,7 @@ class AbstractController : public MemObject, public Consumer // return instance name void blockOnQueue(Addr, MessageBuffer*); void unblock(Addr); + bool isBlocked(Addr); virtual MessageBuffer* getMandatoryQueue() const = 0; virtual MessageBuffer* getMemoryQueue() const = 0; @@ -84,7 +85,7 @@ class AbstractController : public MemObject, public Consumer virtual void regStats(); virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0; - virtual Sequencer* getSequencer() const = 0; + virtual Sequencer* getCPUSequencer() const = 0; //! These functions are used by ruby system to read/write the data blocks //! that exist with in the controller. diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh index 46071335e..cdedc2e14 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh @@ -43,6 +43,12 @@ map_Address_to_DirectoryNode(Addr addr) return DirectoryMemory::mapAddressToDirectoryVersion(addr); } +inline NodeID +map_Address_to_TCCdirNode(Addr addr) +{ + return DirectoryMemory::mapAddressToDirectoryVersion(addr); +} + // used to determine the home directory // returns a value between 0 and total_directories_within_the_system inline MachineID @@ -53,6 +59,22 @@ map_Address_to_Directory(Addr addr) return mach; } +inline MachineID +map_Address_to_RegionDir(Addr addr) +{ + MachineID mach = {MachineType_RegionDir, + map_Address_to_DirectoryNode(addr)}; + return mach; +} + +inline MachineID +map_Address_to_TCCdir(Addr addr) +{ + MachineID mach = + {MachineType_TCCdir, map_Address_to_TCCdirNode(addr)}; + return mach; +} + inline NetDest broadcast(MachineType type) { @@ -102,4 +124,11 @@ createMachineID(MachineType type, NodeID id) return mach; } +inline MachineID +MachineTypeAndNodeIDToMachineID(MachineType type, NodeID node) +{ + MachineID mach = {type, node}; + return mach; +} + #endif // __MEM_RUBY_SLICC_INTERFACE_COMPONENTMAPPINGS_HH__ diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index a8a3ba949..45fb85d05 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -35,6 +35,7 @@ #include "mem/protocol/AccessPermission.hh" #include "mem/ruby/structures/CacheMemory.hh" #include "mem/ruby/system/RubySystem.hh" +#include "mem/ruby/system/WeightedLRUPolicy.hh" using namespace std; @@ -66,29 +67,27 @@ CacheMemory::CacheMemory(const Params *p) m_start_index_bit = p->start_index_bit; m_is_instruction_only_cache = p->is_icache; m_resource_stalls = p->resourceStalls; + m_block_size = p->block_size; // may be 0 at this point. Updated in init() } void CacheMemory::init() { - m_cache_num_sets = (m_cache_size / m_cache_assoc) / - RubySystem::getBlockSizeBytes(); + if (m_block_size == 0) { + m_block_size = RubySystem::getBlockSizeBytes(); + } + m_cache_num_sets = (m_cache_size / m_cache_assoc) / m_block_size; assert(m_cache_num_sets > 1); m_cache_num_set_bits = floorLog2(m_cache_num_sets); assert(m_cache_num_set_bits > 0); - m_cache.resize(m_cache_num_sets); - for (int i = 0; i < m_cache_num_sets; i++) { - m_cache[i].resize(m_cache_assoc); - for (int j = 0; j < m_cache_assoc; j++) { - m_cache[i][j] = NULL; - } - } + m_cache.resize(m_cache_num_sets, + std::vector<AbstractCacheEntry*>(m_cache_assoc, nullptr)); } CacheMemory::~CacheMemory() { - if (m_replacementPolicy_ptr != NULL) + if (m_replacementPolicy_ptr) delete m_replacementPolicy_ptr; for (int i = 0; i < m_cache_num_sets; i++) { for (int j = 0; j < m_cache_assoc; j++) { @@ -359,6 +358,37 @@ CacheMemory::setMRU(const AbstractCacheEntry *e) } void +CacheMemory::setMRU(Addr address, int occupancy) +{ + int64_t cacheSet = addressToCacheSet(address); + int loc = findTagInSet(cacheSet, address); + + if(loc != -1) { + if (m_replacementPolicy_ptr->useOccupancy()) { + (static_cast<WeightedLRUPolicy*>(m_replacementPolicy_ptr))-> + touch(cacheSet, loc, curTick(), occupancy); + } else { + m_replacementPolicy_ptr-> + touch(cacheSet, loc, curTick()); + } + } +} + +int +CacheMemory::getReplacementWeight(int64_t set, int64_t loc) +{ + assert(set < m_cache_num_sets); + assert(loc < m_cache_assoc); + int ret = 0; + if(m_cache[set][loc] != NULL) { + ret = m_cache[set][loc]->getNumValidBlocks(); + assert(ret >= 0); + } + + return ret; +} + +void CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const { uint64_t warmedUpBlocks = 0; diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh index 72805b32b..5b30505d3 100644 --- a/src/mem/ruby/structures/CacheMemory.hh +++ b/src/mem/ruby/structures/CacheMemory.hh @@ -106,7 +106,8 @@ class CacheMemory : public SimObject // Set this address to most recently used void setMRU(Addr address); - // Set this entry to most recently used + void setMRU(Addr addr, int occupancy); + int getReplacementWeight(int64_t set, int64_t loc); void setMRU(const AbstractCacheEntry *e); // Functions for locking and unlocking cache lines corresponding to the @@ -146,6 +147,7 @@ class CacheMemory : public SimObject Stats::Scalar numDataArrayStalls; int getCacheSize() const { return m_cache_size; } + int getCacheAssoc() const { return m_cache_assoc; } int getNumBlocks() const { return m_cache_num_sets * m_cache_assoc; } Addr getAddressAtIdx(int idx) const; @@ -182,6 +184,7 @@ class CacheMemory : public SimObject int m_cache_assoc; int m_start_index_bit; bool m_resource_stalls; + int m_block_size; }; std::ostream& operator<<(std::ostream& out, const CacheMemory& obj); diff --git a/src/mem/ruby/structures/RubyCache.py b/src/mem/ruby/structures/RubyCache.py index 4eb87ac74..9fc4726b0 100644 --- a/src/mem/ruby/structures/RubyCache.py +++ b/src/mem/ruby/structures/RubyCache.py @@ -42,6 +42,7 @@ class RubyCache(SimObject): "") start_index_bit = Param.Int(6, "index start, default 6 for 64-byte line"); is_icache = Param.Bool(False, "is instruction only cache"); + block_size = Param.MemorySize("0B", "block size in bytes. 0 means default RubyBlockSize") dataArrayBanks = Param.Int(1, "Number of banks for the data array") tagArrayBanks = Param.Int(1, "Number of banks for the tag array") diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc new file mode 100644 index 000000000..db279bd3a --- /dev/null +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -0,0 +1,1397 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Sooraj Puthoor + */ + +#include "base/misc.hh" +#include "base/str.hh" +#include "config/the_isa.hh" + +#if THE_ISA == X86_ISA +#include "arch/x86/insts/microldstop.hh" + +#endif // X86_ISA +#include "mem/ruby/system/GPUCoalescer.hh" + +#include "cpu/testers/rubytest/RubyTester.hh" +#include "debug/GPUCoalescer.hh" +#include "debug/MemoryAccess.hh" +#include "debug/ProtocolTrace.hh" +#include "debug/RubyPort.hh" +#include "debug/RubyStats.hh" +#include "gpu-compute/shader.hh" +#include "mem/packet.hh" +#include "mem/ruby/common/DataBlock.hh" +#include "mem/ruby/common/SubBlock.hh" +#include "mem/ruby/network/MessageBuffer.hh" +#include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/slicc_interface/AbstractController.hh" +#include "mem/ruby/slicc_interface/RubyRequest.hh" +#include "mem/ruby/structures/CacheMemory.hh" +#include "mem/ruby/system/RubySystem.hh" +#include "params/RubyGPUCoalescer.hh" + +using namespace std; + +GPUCoalescer * +RubyGPUCoalescerParams::create() +{ + return new GPUCoalescer(this); +} + +HSAScope +reqScopeToHSAScope(Request* req) +{ + HSAScope accessScope = HSAScope_UNSPECIFIED; + if (req->isScoped()) { + if (req->isWavefrontScope()) { + accessScope = HSAScope_WAVEFRONT; + } else if (req->isWorkgroupScope()) { + accessScope = HSAScope_WORKGROUP; + } else if (req->isDeviceScope()) { + accessScope = HSAScope_DEVICE; + } else if (req->isSystemScope()) { + accessScope = HSAScope_SYSTEM; + } else { + fatal("Bad scope type"); + } + } + return accessScope; +} + +HSASegment +reqSegmentToHSASegment(Request* req) +{ + HSASegment accessSegment = HSASegment_GLOBAL; + + if (req->isGlobalSegment()) { + accessSegment = HSASegment_GLOBAL; + } else if (req->isGroupSegment()) { + accessSegment = HSASegment_GROUP; + } else if (req->isPrivateSegment()) { + accessSegment = HSASegment_PRIVATE; + } else if (req->isKernargSegment()) { + accessSegment = HSASegment_KERNARG; + } else if (req->isReadonlySegment()) { + accessSegment = HSASegment_READONLY; + } else if (req->isSpillSegment()) { + accessSegment = HSASegment_SPILL; + } else if (req->isArgSegment()) { + accessSegment = HSASegment_ARG; + } else { + fatal("Bad segment type"); + } + + return accessSegment; +} + +GPUCoalescer::GPUCoalescer(const Params *p) + : RubyPort(p), issueEvent(this), deadlockCheckEvent(this) +{ + m_store_waiting_on_load_cycles = 0; + m_store_waiting_on_store_cycles = 0; + m_load_waiting_on_store_cycles = 0; + m_load_waiting_on_load_cycles = 0; + + m_outstanding_count = 0; + + m_max_outstanding_requests = 0; + m_deadlock_threshold = 0; + m_instCache_ptr = nullptr; + m_dataCache_ptr = nullptr; + + m_instCache_ptr = p->icache; + m_dataCache_ptr = p->dcache; + m_max_outstanding_requests = p->max_outstanding_requests; + m_deadlock_threshold = p->deadlock_threshold; + + assert(m_max_outstanding_requests > 0); + assert(m_deadlock_threshold > 0); + assert(m_instCache_ptr); + assert(m_dataCache_ptr); + + m_data_cache_hit_latency = p->dcache_hit_latency; + + m_usingNetworkTester = p->using_network_tester; + assumingRfOCoherence = p->assume_rfo; +} + +GPUCoalescer::~GPUCoalescer() +{ +} + +void +GPUCoalescer::wakeup() +{ + // Check for deadlock of any of the requests + Cycles current_time = curCycle(); + + // Check across all outstanding requests + int total_outstanding = 0; + + RequestTable::iterator read = m_readRequestTable.begin(); + RequestTable::iterator read_end = m_readRequestTable.end(); + for (; read != read_end; ++read) { + GPUCoalescerRequest* request = read->second; + if (current_time - request->issue_time < m_deadlock_threshold) + continue; + + panic("Possible Deadlock detected. Aborting!\n" + "version: %d request.paddr: 0x%x m_readRequestTable: %d " + "current time: %u issue_time: %d difference: %d\n", m_version, + request->pkt->getAddr(), m_readRequestTable.size(), + current_time * clockPeriod(), request->issue_time * clockPeriod(), + (current_time - request->issue_time)*clockPeriod()); + } + + RequestTable::iterator write = m_writeRequestTable.begin(); + RequestTable::iterator write_end = m_writeRequestTable.end(); + for (; write != write_end; ++write) { + GPUCoalescerRequest* request = write->second; + if (current_time - request->issue_time < m_deadlock_threshold) + continue; + + panic("Possible Deadlock detected. Aborting!\n" + "version: %d request.paddr: 0x%x m_writeRequestTable: %d " + "current time: %u issue_time: %d difference: %d\n", m_version, + request->pkt->getAddr(), m_writeRequestTable.size(), + current_time * clockPeriod(), request->issue_time * clockPeriod(), + (current_time - request->issue_time) * clockPeriod()); + } + + total_outstanding += m_writeRequestTable.size(); + total_outstanding += m_readRequestTable.size(); + + assert(m_outstanding_count == total_outstanding); + + if (m_outstanding_count > 0) { + // If there are still outstanding requests, keep checking + schedule(deadlockCheckEvent, + m_deadlock_threshold * clockPeriod() + + curTick()); + } +} + +void +GPUCoalescer::resetStats() +{ + m_latencyHist.reset(); + m_missLatencyHist.reset(); + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_typeLatencyHist[i]->reset(); + m_missTypeLatencyHist[i]->reset(); + for (int j = 0; j < MachineType_NUM; j++) { + m_missTypeMachLatencyHist[i][j]->reset(); + } + } + + for (int i = 0; i < MachineType_NUM; i++) { + m_missMachLatencyHist[i]->reset(); + + m_IssueToInitialDelayHist[i]->reset(); + m_InitialToForwardDelayHist[i]->reset(); + m_ForwardToFirstResponseDelayHist[i]->reset(); + m_FirstResponseToCompletionDelayHist[i]->reset(); + } +} + +void +GPUCoalescer::printProgress(ostream& out) const +{ +} + +RequestStatus +GPUCoalescer::getRequestStatus(PacketPtr pkt, RubyRequestType request_type) +{ + Addr line_addr = makeLineAddress(pkt->getAddr()); + + if (!m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge())) { + return RequestStatus_BufferFull; + } + + if(m_controller->isBlocked(line_addr) && + request_type != RubyRequestType_Locked_RMW_Write) { + return RequestStatus_Aliased; + } + + if ((request_type == RubyRequestType_ST) || + (request_type == RubyRequestType_ATOMIC) || + (request_type == RubyRequestType_ATOMIC_RETURN) || + (request_type == RubyRequestType_ATOMIC_NO_RETURN) || + (request_type == RubyRequestType_RMW_Read) || + (request_type == RubyRequestType_RMW_Write) || + (request_type == RubyRequestType_Load_Linked) || + (request_type == RubyRequestType_Store_Conditional) || + (request_type == RubyRequestType_Locked_RMW_Read) || + (request_type == RubyRequestType_Locked_RMW_Write) || + (request_type == RubyRequestType_FLUSH)) { + + // Check if there is any outstanding read request for the same + // cache line. + if (m_readRequestTable.count(line_addr) > 0) { + m_store_waiting_on_load_cycles++; + return RequestStatus_Aliased; + } + + if (m_writeRequestTable.count(line_addr) > 0) { + // There is an outstanding write request for the cache line + m_store_waiting_on_store_cycles++; + return RequestStatus_Aliased; + } + } else { + // Check if there is any outstanding write request for the same + // cache line. + if (m_writeRequestTable.count(line_addr) > 0) { + m_load_waiting_on_store_cycles++; + return RequestStatus_Aliased; + } + + if (m_readRequestTable.count(line_addr) > 0) { + // There is an outstanding read request for the cache line + m_load_waiting_on_load_cycles++; + return RequestStatus_Aliased; + } + } + + return RequestStatus_Ready; + +} + + + +// sets the kernelEndList +void +GPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt) +{ + // Don't know if this will happen or is possible + // but I just want to be careful and not have it become + // simulator hang in the future + DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id); + assert(kernelEndList.count(wavefront_id) == 0); + + kernelEndList[wavefront_id] = pkt; + DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n", + kernelEndList.size()); +} + + +// Insert the request on the correct request table. Return true if +// the entry was already present. +bool +GPUCoalescer::insertRequest(PacketPtr pkt, RubyRequestType request_type) +{ + assert(getRequestStatus(pkt, request_type) == RequestStatus_Ready || + pkt->req->isLockedRMW() || + !m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge())); + + int total_outstanding M5_VAR_USED = + m_writeRequestTable.size() + m_readRequestTable.size(); + + assert(m_outstanding_count == total_outstanding); + + // See if we should schedule a deadlock check + if (deadlockCheckEvent.scheduled() == false) { + schedule(deadlockCheckEvent, m_deadlock_threshold + curTick()); + } + + Addr line_addr = makeLineAddress(pkt->getAddr()); + if ((request_type == RubyRequestType_ST) || + (request_type == RubyRequestType_ATOMIC) || + (request_type == RubyRequestType_ATOMIC_RETURN) || + (request_type == RubyRequestType_ATOMIC_NO_RETURN) || + (request_type == RubyRequestType_RMW_Read) || + (request_type == RubyRequestType_RMW_Write) || + (request_type == RubyRequestType_Load_Linked) || + (request_type == RubyRequestType_Store_Conditional) || + (request_type == RubyRequestType_Locked_RMW_Read) || + (request_type == RubyRequestType_Locked_RMW_Write) || + (request_type == RubyRequestType_FLUSH)) { + + pair<RequestTable::iterator, bool> r = + m_writeRequestTable.insert(RequestTable::value_type(line_addr, + (GPUCoalescerRequest*) NULL)); + if (r.second) { + RequestTable::iterator i = r.first; + i->second = new GPUCoalescerRequest(pkt, request_type, + curCycle()); + DPRINTF(GPUCoalescer, + "Inserting write request for paddr %#x for type %d\n", + pkt->req->getPaddr(), i->second->m_type); + m_outstanding_count++; + } else { + return true; + } + } else { + pair<RequestTable::iterator, bool> r = + m_readRequestTable.insert(RequestTable::value_type(line_addr, + (GPUCoalescerRequest*) NULL)); + + if (r.second) { + RequestTable::iterator i = r.first; + i->second = new GPUCoalescerRequest(pkt, request_type, + curCycle()); + DPRINTF(GPUCoalescer, + "Inserting read request for paddr %#x for type %d\n", + pkt->req->getPaddr(), i->second->m_type); + m_outstanding_count++; + } else { + return true; + } + } + + m_outstandReqHist.sample(m_outstanding_count); + + total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size(); + assert(m_outstanding_count == total_outstanding); + + return false; +} + +void +GPUCoalescer::markRemoved() +{ + m_outstanding_count--; + assert(m_outstanding_count == + m_writeRequestTable.size() + m_readRequestTable.size()); +} + +void +GPUCoalescer::removeRequest(GPUCoalescerRequest* srequest) +{ + assert(m_outstanding_count == + m_writeRequestTable.size() + m_readRequestTable.size()); + + Addr line_addr = makeLineAddress(srequest->pkt->getAddr()); + if ((srequest->m_type == RubyRequestType_ST) || + (srequest->m_type == RubyRequestType_RMW_Read) || + (srequest->m_type == RubyRequestType_RMW_Write) || + (srequest->m_type == RubyRequestType_Load_Linked) || + (srequest->m_type == RubyRequestType_Store_Conditional) || + (srequest->m_type == RubyRequestType_Locked_RMW_Read) || + (srequest->m_type == RubyRequestType_Locked_RMW_Write)) { + m_writeRequestTable.erase(line_addr); + } else { + m_readRequestTable.erase(line_addr); + } + + markRemoved(); +} + +bool +GPUCoalescer::handleLlsc(Addr address, GPUCoalescerRequest* request) +{ + // + // The success flag indicates whether the LLSC operation was successful. + // LL ops will always succeed, but SC may fail if the cache line is no + // longer locked. + // + bool success = true; + if (request->m_type == RubyRequestType_Store_Conditional) { + if (!m_dataCache_ptr->isLocked(address, m_version)) { + // + // For failed SC requests, indicate the failure to the cpu by + // setting the extra data to zero. + // + request->pkt->req->setExtraData(0); + success = false; + } else { + // + // For successful SC requests, indicate the success to the cpu by + // setting the extra data to one. + // + request->pkt->req->setExtraData(1); + } + // + // Independent of success, all SC operations must clear the lock + // + m_dataCache_ptr->clearLocked(address); + } else if (request->m_type == RubyRequestType_Load_Linked) { + // + // Note: To fully follow Alpha LLSC semantics, should the LL clear any + // previously locked cache lines? + // + m_dataCache_ptr->setLocked(address, m_version); + } else if ((m_dataCache_ptr->isTagPresent(address)) && + (m_dataCache_ptr->isLocked(address, m_version))) { + // + // Normal writes should clear the locked address + // + m_dataCache_ptr->clearLocked(address); + } + return success; +} + +void +GPUCoalescer::writeCallback(Addr address, DataBlock& data) +{ + writeCallback(address, MachineType_NULL, data); +} + +void +GPUCoalescer::writeCallback(Addr address, + MachineType mach, + DataBlock& data) +{ + writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0)); +} + +void +GPUCoalescer::writeCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime) +{ + writeCallback(address, mach, data, + initialRequestTime, forwardRequestTime, firstResponseTime, + false); +} + +void +GPUCoalescer::writeCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion) +{ + assert(address == makeLineAddress(address)); + + DPRINTF(GPUCoalescer, "write callback for address %#x\n", address); + assert(m_writeRequestTable.count(makeLineAddress(address))); + + RequestTable::iterator i = m_writeRequestTable.find(address); + assert(i != m_writeRequestTable.end()); + GPUCoalescerRequest* request = i->second; + + m_writeRequestTable.erase(i); + markRemoved(); + + assert((request->m_type == RubyRequestType_ST) || + (request->m_type == RubyRequestType_ATOMIC) || + (request->m_type == RubyRequestType_ATOMIC_RETURN) || + (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) || + (request->m_type == RubyRequestType_RMW_Read) || + (request->m_type == RubyRequestType_RMW_Write) || + (request->m_type == RubyRequestType_Load_Linked) || + (request->m_type == RubyRequestType_Store_Conditional) || + (request->m_type == RubyRequestType_Locked_RMW_Read) || + (request->m_type == RubyRequestType_Locked_RMW_Write) || + (request->m_type == RubyRequestType_FLUSH)); + + + // + // For Alpha, properly handle LL, SC, and write requests with respect to + // locked cache blocks. + // + // Not valid for Network_test protocl + // + bool success = true; + if(!m_usingNetworkTester) + success = handleLlsc(address, request); + + if (request->m_type == RubyRequestType_Locked_RMW_Read) { + m_controller->blockOnQueue(address, m_mandatory_q_ptr); + } else if (request->m_type == RubyRequestType_Locked_RMW_Write) { + m_controller->unblock(address); + } + + hitCallback(request, mach, data, success, + request->issue_time, forwardRequestTime, firstResponseTime, + isRegion); +} + +void +GPUCoalescer::readCallback(Addr address, DataBlock& data) +{ + readCallback(address, MachineType_NULL, data); +} + +void +GPUCoalescer::readCallback(Addr address, + MachineType mach, + DataBlock& data) +{ + readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0)); +} + +void +GPUCoalescer::readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime) +{ + + readCallback(address, mach, data, + initialRequestTime, forwardRequestTime, firstResponseTime, + false); +} + +void +GPUCoalescer::readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion) +{ + assert(address == makeLineAddress(address)); + assert(m_readRequestTable.count(makeLineAddress(address))); + + DPRINTF(GPUCoalescer, "read callback for address %#x\n", address); + RequestTable::iterator i = m_readRequestTable.find(address); + assert(i != m_readRequestTable.end()); + GPUCoalescerRequest* request = i->second; + + m_readRequestTable.erase(i); + markRemoved(); + + assert((request->m_type == RubyRequestType_LD) || + (request->m_type == RubyRequestType_IFETCH)); + + hitCallback(request, mach, data, true, + request->issue_time, forwardRequestTime, firstResponseTime, + isRegion); +} + +void +GPUCoalescer::hitCallback(GPUCoalescerRequest* srequest, + MachineType mach, + DataBlock& data, + bool success, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion) +{ + PacketPtr pkt = srequest->pkt; + Addr request_address = pkt->getAddr(); + Addr request_line_address = makeLineAddress(request_address); + + RubyRequestType type = srequest->m_type; + + // Set this cache entry to the most recently used + if (type == RubyRequestType_IFETCH) { + if (m_instCache_ptr->isTagPresent(request_line_address)) + m_instCache_ptr->setMRU(request_line_address); + } else { + if (m_dataCache_ptr->isTagPresent(request_line_address)) + m_dataCache_ptr->setMRU(request_line_address); + } + + recordMissLatency(srequest, mach, + initialRequestTime, + forwardRequestTime, + firstResponseTime, + success, isRegion); + // update the data + // + // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER + int len = reqCoalescer[request_line_address].size(); + std::vector<PacketPtr> mylist; + for (int i = 0; i < len; ++i) { + PacketPtr pkt = reqCoalescer[request_line_address][i].first; + assert(type == + reqCoalescer[request_line_address][i].second[PrimaryType]); + request_address = pkt->getAddr(); + request_line_address = makeLineAddress(pkt->getAddr()); + if (pkt->getPtr<uint8_t>()) { + if ((type == RubyRequestType_LD) || + (type == RubyRequestType_ATOMIC) || + (type == RubyRequestType_ATOMIC_RETURN) || + (type == RubyRequestType_IFETCH) || + (type == RubyRequestType_RMW_Read) || + (type == RubyRequestType_Locked_RMW_Read) || + (type == RubyRequestType_Load_Linked)) { + memcpy(pkt->getPtr<uint8_t>(), + data.getData(getOffset(request_address), + pkt->getSize()), + pkt->getSize()); + } else { + data.setData(pkt->getPtr<uint8_t>(), + getOffset(request_address), pkt->getSize()); + } + } else { + DPRINTF(MemoryAccess, + "WARNING. Data not transfered from Ruby to M5 for type " \ + "%s\n", + RubyRequestType_to_string(type)); + } + + // If using the RubyTester, update the RubyTester sender state's + // subBlock with the recieved data. The tester will later access + // this state. + // Note: RubyPort will access it's sender state before the + // RubyTester. + if (m_usingRubyTester) { + RubyPort::SenderState *requestSenderState = + safe_cast<RubyPort::SenderState*>(pkt->senderState); + RubyTester::SenderState* testerSenderState = + safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor); + testerSenderState->subBlock.mergeFrom(data); + } + + mylist.push_back(pkt); + } + delete srequest; + reqCoalescer.erase(request_line_address); + assert(!reqCoalescer.count(request_line_address)); + + + + completeHitCallback(mylist, len); +} + +bool +GPUCoalescer::empty() const +{ + return m_writeRequestTable.empty() && m_readRequestTable.empty(); +} + +// Analyzes the packet to see if this request can be coalesced. +// If request can be coalesced, this request is added to the reqCoalescer table +// and makeRequest returns RequestStatus_Issued; +// If this is the first request to a cacheline, request is added to both +// newRequests queue and to the reqCoalescer table; makeRequest +// returns RequestStatus_Issued. +// If there is a pending request to this cacheline and this request +// can't be coalesced, RequestStatus_Aliased is returned and +// the packet needs to be reissued. +RequestStatus +GPUCoalescer::makeRequest(PacketPtr pkt) +{ + // Check for GPU Barrier Kernel End or Kernel Begin + // Leave these to be handled by the child class + // Kernel End/Barrier = isFlush + isRelease + // Kernel Begin = isFlush + isAcquire + if (pkt->req->isKernel()) { + if (pkt->req->isAcquire()){ + // This is a Kernel Begin leave handling to + // virtual xCoalescer::makeRequest + return RequestStatus_Issued; + }else if(pkt->req->isRelease()) { + // This is a Kernel End leave handling to + // virtual xCoalescer::makeRequest + // If we are here then we didn't call + // a virtual version of this function + // so we will also schedule the callback + int wf_id = 0; + if (pkt->req->hasContextId()) { + wf_id = pkt->req->contextId(); + } + insertKernel(wf_id, pkt); + newKernelEnds.push_back(wf_id); + if (!issueEvent.scheduled()) { + schedule(issueEvent, curTick()); + } + return RequestStatus_Issued; + } + } + + // If number of outstanding requests greater than the max allowed, + // return RequestStatus_BufferFull. This logic can be extended to + // support proper backpressure. + if (m_outstanding_count >= m_max_outstanding_requests) { + return RequestStatus_BufferFull; + } + + RubyRequestType primary_type = RubyRequestType_NULL; + RubyRequestType secondary_type = RubyRequestType_NULL; + + if (pkt->isLLSC()) { + // + // Alpha LL/SC instructions need to be handled carefully by the cache + // coherence protocol to ensure they follow the proper semantics. In + // particular, by identifying the operations as atomic, the protocol + // should understand that migratory sharing optimizations should not + // be performed (i.e. a load between the LL and SC should not steal + // away exclusive permission). + // + if (pkt->isWrite()) { + primary_type = RubyRequestType_Store_Conditional; + } else { + assert(pkt->isRead()); + primary_type = RubyRequestType_Load_Linked; + } + secondary_type = RubyRequestType_ATOMIC; + } else if (pkt->req->isLockedRMW()) { + // + // x86 locked instructions are translated to store cache coherence + // requests because these requests should always be treated as read + // exclusive operations and should leverage any migratory sharing + // optimization built into the protocol. + // + if (pkt->isWrite()) { + primary_type = RubyRequestType_Locked_RMW_Write; + } else { + assert(pkt->isRead()); + primary_type = RubyRequestType_Locked_RMW_Read; + } + secondary_type = RubyRequestType_ST; + } else if (pkt->isAtomicOp()) { + // + // GPU Atomic Operation + // + primary_type = RubyRequestType_ATOMIC; + secondary_type = RubyRequestType_ATOMIC; + } else { + if (pkt->isRead()) { + if (pkt->req->isInstFetch()) { + primary_type = secondary_type = RubyRequestType_IFETCH; + } else { +#if THE_ISA == X86_ISA + uint32_t flags = pkt->req->getFlags(); + bool storeCheck = flags & + (TheISA::StoreCheck << TheISA::FlagShift); +#else + bool storeCheck = false; +#endif // X86_ISA + if (storeCheck) { + primary_type = RubyRequestType_RMW_Read; + secondary_type = RubyRequestType_ST; + } else { + primary_type = secondary_type = RubyRequestType_LD; + } + } + } else if (pkt->isWrite()) { + // + // Note: M5 packets do not differentiate ST from RMW_Write + // + primary_type = secondary_type = RubyRequestType_ST; + } else if (pkt->isFlush()) { + primary_type = secondary_type = RubyRequestType_FLUSH; + } else if (pkt->req->isRelease() || pkt->req->isAcquire()) { + if (assumingRfOCoherence) { + // If we reached here, this request must be a memFence + // and the protocol implements RfO, the coalescer can + // assume sequentially consistency and schedule the callback + // immediately. + // Currently the code implements fence callbacks + // by reusing the mechanism for kernel completions. + // This should be fixed. + int wf_id = 0; + if (pkt->req->hasContextId()) { + wf_id = pkt->req->contextId(); + } + insertKernel(wf_id, pkt); + newKernelEnds.push_back(wf_id); + if (!issueEvent.scheduled()) { + schedule(issueEvent, curTick()); + } + return RequestStatus_Issued; + } else { + // If not RfO, return issued here and let the child coalescer + // take care of it. + return RequestStatus_Issued; + } + } else { + panic("Unsupported ruby packet type\n"); + } + } + + // Check if there is any pending request to this cache line from + // previous cycles. + // If there is a pending request, return aliased. Since coalescing + // across time is not permitted, aliased requests are not coalesced. + // If a request for this address has already been issued, we must block + RequestStatus status = getRequestStatus(pkt, primary_type); + if (status != RequestStatus_Ready) + return status; + + Addr line_addr = makeLineAddress(pkt->getAddr()); + + // Check if this request can be coalesced with previous + // requests from this cycle. + if (!reqCoalescer.count(line_addr)) { + // This is the first access to this cache line. + // A new request to the memory subsystem has to be + // made in the next cycle for this cache line, so + // add this line addr to the "newRequests" queue + newRequests.push_back(line_addr); + + // There was a request to this cache line in this cycle, + // let us see if we can coalesce this request with the previous + // requests from this cycle + } else if (primary_type != + reqCoalescer[line_addr][0].second[PrimaryType]) { + // can't coalesce loads, stores and atomics! + return RequestStatus_Aliased; + } else if (pkt->req->isLockedRMW() || + reqCoalescer[line_addr][0].first->req->isLockedRMW()) { + // can't coalesce locked accesses, but can coalesce atomics! + return RequestStatus_Aliased; + } else if (pkt->req->hasContextId() && pkt->req->isRelease() && + pkt->req->contextId() != + reqCoalescer[line_addr][0].first->req->contextId()) { + // can't coalesce releases from different wavefronts + return RequestStatus_Aliased; + } + + // in addition to the packet, we need to save both request types + reqCoalescer[line_addr].push_back( + RequestDesc(pkt, std::vector<RubyRequestType>()) ); + reqCoalescer[line_addr].back().second.push_back(primary_type); + reqCoalescer[line_addr].back().second.push_back(secondary_type); + if (!issueEvent.scheduled()) + schedule(issueEvent, curTick()); + // TODO: issue hardware prefetches here + return RequestStatus_Issued; +} + +void +GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) +{ + + int proc_id = -1; + if (pkt != NULL && pkt->req->hasContextId()) { + proc_id = pkt->req->contextId(); + } + + // If valid, copy the pc to the ruby request + Addr pc = 0; + if (pkt->req->hasPC()) { + pc = pkt->req->getPC(); + } + + // At the moment setting scopes only counts + // for GPU spill space accesses + // which is pkt->req->isStack() + // this scope is REPLACE since it + // does not need to be flushed at the end + // of a kernel Private and local may need + // to be visible at the end of the kernel + HSASegment accessSegment = reqSegmentToHSASegment(pkt->req); + HSAScope accessScope = reqScopeToHSAScope(pkt->req); + + Addr line_addr = makeLineAddress(pkt->getAddr()); + + // Creating WriteMask that records written bytes + // and atomic operations. This enables partial writes + // and partial reads of those writes + DataBlock dataBlock; + dataBlock.clear(); + uint32_t blockSize = RubySystem::getBlockSizeBytes(); + std::vector<bool> accessMask(blockSize,false); + std::vector< std::pair<int,AtomicOpFunctor*> > atomicOps; + uint32_t tableSize = reqCoalescer[line_addr].size(); + for (int i = 0; i < tableSize; i++) { + PacketPtr tmpPkt = reqCoalescer[line_addr][i].first; + uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr; + uint32_t tmpSize = tmpPkt->getSize(); + if (tmpPkt->isAtomicOp()) { + std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset, + tmpPkt->getAtomicOp()); + atomicOps.push_back(tmpAtomicOp); + } else if(tmpPkt->isWrite()) { + dataBlock.setData(tmpPkt->getPtr<uint8_t>(), + tmpOffset, tmpSize); + } + for (int j = 0; j < tmpSize; j++) { + accessMask[tmpOffset + j] = true; + } + } + std::shared_ptr<RubyRequest> msg; + if (pkt->isAtomicOp()) { + msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(), + pkt->getPtr<uint8_t>(), + pkt->getSize(), pc, secondary_type, + RubyAccessMode_Supervisor, pkt, + PrefetchBit_No, proc_id, 100, + blockSize, accessMask, + dataBlock, atomicOps, + accessScope, accessSegment); + } else { + msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(), + pkt->getPtr<uint8_t>(), + pkt->getSize(), pc, secondary_type, + RubyAccessMode_Supervisor, pkt, + PrefetchBit_No, proc_id, 100, + blockSize, accessMask, + dataBlock, + accessScope, accessSegment); + } + DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n", + curTick(), m_version, "Coal", "Begin", "", "", + printAddress(msg->getPhysicalAddress()), + RubyRequestType_to_string(secondary_type)); + + fatal_if(secondary_type == RubyRequestType_IFETCH, + "there should not be any I-Fetch requests in the GPU Coalescer"); + + // Send the message to the cache controller + fatal_if(m_data_cache_hit_latency == 0, + "should not have a latency of zero"); + + assert(m_mandatory_q_ptr); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); +} + +template <class KEY, class VALUE> +std::ostream & +operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map) +{ + out << "["; + for (auto i = map.begin(); i != map.end(); ++i) + out << " " << i->first << "=" << i->second; + out << " ]"; + + return out; +} + +void +GPUCoalescer::print(ostream& out) const +{ + out << "[GPUCoalescer: " << m_version + << ", outstanding requests: " << m_outstanding_count + << ", read request table: " << m_readRequestTable + << ", write request table: " << m_writeRequestTable + << "]"; +} + +// this can be called from setState whenever coherence permissions are +// upgraded when invoked, coherence violations will be checked for the +// given block +void +GPUCoalescer::checkCoherence(Addr addr) +{ +#ifdef CHECK_COHERENCE + m_ruby_system->checkGlobalCoherenceInvariant(addr); +#endif +} + +void +GPUCoalescer::recordRequestType(SequencerRequestType requestType) { + DPRINTF(RubyStats, "Recorded statistic: %s\n", + SequencerRequestType_to_string(requestType)); +} + +GPUCoalescer::IssueEvent::IssueEvent(GPUCoalescer* _seq) + : Event(Progress_Event_Pri), seq(_seq) +{ +} + + +void +GPUCoalescer::completeIssue() +{ + // newRequests has the cacheline addresses of all the + // requests which need to be issued to the memory subsystem + // in this cycle + int len = newRequests.size(); + DPRINTF(GPUCoalescer, "Completing issue for %d new requests.\n", len); + for (int i = 0; i < len; ++i) { + // Get the requests from reqCoalescer table. Get only the + // first request for each cacheline, the remaining requests + // can be coalesced with the first request. So, only + // one request is issued per cacheline. + RequestDesc info = reqCoalescer[newRequests[i]][0]; + PacketPtr pkt = info.first; + DPRINTF(GPUCoalescer, "Completing for newReq %d: paddr %#x\n", + i, pkt->req->getPaddr()); + // Insert this request to the read/writeRequestTables. These tables + // are used to track aliased requests in makeRequest subroutine + bool found = insertRequest(pkt, info.second[PrimaryType]); + + if (found) { + panic("GPUCoalescer::makeRequest should never be called if the " + "request is already outstanding\n"); + } + + // Issue request to ruby subsystem + issueRequest(pkt, info.second[SecondaryType]); + } + newRequests.clear(); + + // have Kernel End releases been issued this cycle + len = newKernelEnds.size(); + for (int i = 0; i < len; i++) { + kernelCallback(newKernelEnds[i]); + } + newKernelEnds.clear(); +} + +void +GPUCoalescer::IssueEvent::process() +{ + seq->completeIssue(); +} + +const char * +GPUCoalescer::IssueEvent::description() const +{ + return "Issue coalesced request"; +} + +void +GPUCoalescer::evictionCallback(Addr address) +{ + ruby_eviction_callback(address); +} + +void +GPUCoalescer::kernelCallback(int wavefront_id) +{ + assert(kernelEndList.count(wavefront_id)); + + ruby_hit_callback(kernelEndList[wavefront_id]); + + kernelEndList.erase(wavefront_id); +} + +void +GPUCoalescer::atomicCallback(Addr address, + MachineType mach, + const DataBlock& data) +{ + assert(address == makeLineAddress(address)); + + DPRINTF(GPUCoalescer, "atomic callback for address %#x\n", address); + assert(m_writeRequestTable.count(makeLineAddress(address))); + + RequestTable::iterator i = m_writeRequestTable.find(address); + assert(i != m_writeRequestTable.end()); + GPUCoalescerRequest* srequest = i->second; + + m_writeRequestTable.erase(i); + markRemoved(); + + assert((srequest->m_type == RubyRequestType_ATOMIC) || + (srequest->m_type == RubyRequestType_ATOMIC_RETURN) || + (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN)); + + + // Atomics don't write to cache, so there is no MRU update... + + recordMissLatency(srequest, mach, + srequest->issue_time, Cycles(0), Cycles(0), true, false); + + PacketPtr pkt = srequest->pkt; + Addr request_address = pkt->getAddr(); + Addr request_line_address = makeLineAddress(pkt->getAddr()); + + int len = reqCoalescer[request_line_address].size(); + std::vector<PacketPtr> mylist; + for (int i = 0; i < len; ++i) { + PacketPtr pkt = reqCoalescer[request_line_address][i].first; + assert(srequest->m_type == + reqCoalescer[request_line_address][i].second[PrimaryType]); + request_address = (pkt->getAddr()); + request_line_address = makeLineAddress(request_address); + if (pkt->getPtr<uint8_t>() && + srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) { + /* atomics are done in memory, and return the data *before* the atomic op... */ + memcpy(pkt->getPtr<uint8_t>(), + data.getData(getOffset(request_address), + pkt->getSize()), + pkt->getSize()); + } else { + DPRINTF(MemoryAccess, + "WARNING. Data not transfered from Ruby to M5 for type " \ + "%s\n", + RubyRequestType_to_string(srequest->m_type)); + } + + // If using the RubyTester, update the RubyTester sender state's + // subBlock with the recieved data. The tester will later access + // this state. + // Note: RubyPort will access it's sender state before the + // RubyTester. + if (m_usingRubyTester) { + RubyPort::SenderState *requestSenderState = + safe_cast<RubyPort::SenderState*>(pkt->senderState); + RubyTester::SenderState* testerSenderState = + safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor); + testerSenderState->subBlock.mergeFrom(data); + } + + mylist.push_back(pkt); + } + delete srequest; + reqCoalescer.erase(request_line_address); + assert(!reqCoalescer.count(request_line_address)); + + completeHitCallback(mylist, len); +} + +void +GPUCoalescer::recordCPReadCallBack(MachineID myMachID, MachineID senderMachID) +{ + if(myMachID == senderMachID) { + CP_TCPLdHits++; + } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) { + CP_TCPLdTransfers++; + } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) { + CP_TCCLdHits++; + } else { + CP_LdMiss++; + } +} + +void +GPUCoalescer::recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID) +{ + if(myMachID == senderMachID) { + CP_TCPStHits++; + } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) { + CP_TCPStTransfers++; + } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) { + CP_TCCStHits++; + } else { + CP_StMiss++; + } +} + +void +GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist, int len) +{ + for (int i = 0; i < len; ++i) { + RubyPort::SenderState *ss = + safe_cast<RubyPort::SenderState *>(mylist[i]->senderState); + MemSlavePort *port = ss->port; + assert(port != NULL); + + mylist[i]->senderState = ss->predecessor; + delete ss; + port->hitCallback(mylist[i]); + trySendRetries(); + } + + testDrainComplete(); +} + +PacketPtr +GPUCoalescer::mapAddrToPkt(Addr address) +{ + RequestTable::iterator i = m_readRequestTable.find(address); + assert(i != m_readRequestTable.end()); + GPUCoalescerRequest* request = i->second; + return request->pkt; +} + +void +GPUCoalescer::recordMissLatency(GPUCoalescerRequest* srequest, + MachineType mach, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool success, bool isRegion) +{ + RubyRequestType type = srequest->m_type; + Cycles issued_time = srequest->issue_time; + Cycles completion_time = curCycle(); + assert(completion_time >= issued_time); + Cycles total_lat = completion_time - issued_time; + + // cache stats (valid for RfO protocol only) + if (mach == MachineType_TCP) { + if (type == RubyRequestType_LD) { + GPU_TCPLdHits++; + } else { + GPU_TCPStHits++; + } + } else if (mach == MachineType_L1Cache_wCC) { + if (type == RubyRequestType_LD) { + GPU_TCPLdTransfers++; + } else { + GPU_TCPStTransfers++; + } + } else if (mach == MachineType_TCC) { + if (type == RubyRequestType_LD) { + GPU_TCCLdHits++; + } else { + GPU_TCCStHits++; + } + } else { + if (type == RubyRequestType_LD) { + GPU_LdMiss++; + } else { + GPU_StMiss++; + } + } + + // Profile all access latency, even zero latency accesses + m_latencyHist.sample(total_lat); + m_typeLatencyHist[type]->sample(total_lat); + + // Profile the miss latency for all non-zero demand misses + if (total_lat != Cycles(0)) { + m_missLatencyHist.sample(total_lat); + m_missTypeLatencyHist[type]->sample(total_lat); + + if (mach != MachineType_NUM) { + m_missMachLatencyHist[mach]->sample(total_lat); + m_missTypeMachLatencyHist[type][mach]->sample(total_lat); + + if ((issued_time <= initialRequestTime) && + (initialRequestTime <= forwardRequestTime) && + (forwardRequestTime <= firstResponseTime) && + (firstResponseTime <= completion_time)) { + + m_IssueToInitialDelayHist[mach]->sample( + initialRequestTime - issued_time); + m_InitialToForwardDelayHist[mach]->sample( + forwardRequestTime - initialRequestTime); + m_ForwardToFirstResponseDelayHist[mach]->sample( + firstResponseTime - forwardRequestTime); + m_FirstResponseToCompletionDelayHist[mach]->sample( + completion_time - firstResponseTime); + } + } + + } + + DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n", + curTick(), m_version, "Coal", + success ? "Done" : "SC_Failed", "", "", + printAddress(srequest->pkt->getAddr()), total_lat); +} + +void +GPUCoalescer::regStats() +{ + // These statistical variables are not for display. + // The profiler will collate these across different + // coalescers and display those collated statistics. + m_outstandReqHist.init(10); + m_latencyHist.init(10); + m_missLatencyHist.init(10); + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_typeLatencyHist.push_back(new Stats::Histogram()); + m_typeLatencyHist[i]->init(10); + + m_missTypeLatencyHist.push_back(new Stats::Histogram()); + m_missTypeLatencyHist[i]->init(10); + } + + for (int i = 0; i < MachineType_NUM; i++) { + m_missMachLatencyHist.push_back(new Stats::Histogram()); + m_missMachLatencyHist[i]->init(10); + + m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHist[i]->init(10); + + m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHist[i]->init(10); + + m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHist[i]->init(10); + + m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHist[i]->init(10); + } + + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>()); + + for (int j = 0; j < MachineType_NUM; j++) { + m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHist[i][j]->init(10); + } + } + + // GPU cache stats + GPU_TCPLdHits + .name(name() + ".gpu_tcp_ld_hits") + .desc("loads that hit in the TCP") + ; + GPU_TCPLdTransfers + .name(name() + ".gpu_tcp_ld_transfers") + .desc("TCP to TCP load transfers") + ; + GPU_TCCLdHits + .name(name() + ".gpu_tcc_ld_hits") + .desc("loads that hit in the TCC") + ; + GPU_LdMiss + .name(name() + ".gpu_ld_misses") + .desc("loads that miss in the GPU") + ; + + GPU_TCPStHits + .name(name() + ".gpu_tcp_st_hits") + .desc("stores that hit in the TCP") + ; + GPU_TCPStTransfers + .name(name() + ".gpu_tcp_st_transfers") + .desc("TCP to TCP store transfers") + ; + GPU_TCCStHits + .name(name() + ".gpu_tcc_st_hits") + .desc("stores that hit in the TCC") + ; + GPU_StMiss + .name(name() + ".gpu_st_misses") + .desc("stores that miss in the GPU") + ; + + // CP cache stats + CP_TCPLdHits + .name(name() + ".cp_tcp_ld_hits") + .desc("loads that hit in the TCP") + ; + CP_TCPLdTransfers + .name(name() + ".cp_tcp_ld_transfers") + .desc("TCP to TCP load transfers") + ; + CP_TCCLdHits + .name(name() + ".cp_tcc_ld_hits") + .desc("loads that hit in the TCC") + ; + CP_LdMiss + .name(name() + ".cp_ld_misses") + .desc("loads that miss in the GPU") + ; + + CP_TCPStHits + .name(name() + ".cp_tcp_st_hits") + .desc("stores that hit in the TCP") + ; + CP_TCPStTransfers + .name(name() + ".cp_tcp_st_transfers") + .desc("TCP to TCP store transfers") + ; + CP_TCCStHits + .name(name() + ".cp_tcc_st_hits") + .desc("stores that hit in the TCC") + ; + CP_StMiss + .name(name() + ".cp_st_misses") + .desc("stores that miss in the GPU") + ; +} diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh new file mode 100644 index 000000000..dbd47059c --- /dev/null +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Sooraj Puthoor + */ + +#ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ +#define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ + +#include <iostream> +#include <unordered_map> + +#include "base/statistics.hh" +#include "mem/protocol/HSAScope.hh" +#include "mem/protocol/HSASegment.hh" +#include "mem/protocol/PrefetchBit.hh" +#include "mem/protocol/RubyAccessMode.hh" +#include "mem/protocol/RubyRequestType.hh" +#include "mem/protocol/SequencerRequestType.hh" +#include "mem/request.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Consumer.hh" +#include "mem/ruby/system/RubyPort.hh" + +class DataBlock; +class CacheMsg; +class MachineID; +class CacheMemory; + +class RubyGPUCoalescerParams; + +HSAScope reqScopeToHSAScope(Request* req); +HSASegment reqSegmentToHSASegment(Request* req); + +struct GPUCoalescerRequest +{ + PacketPtr pkt; + RubyRequestType m_type; + Cycles issue_time; + + GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type, + Cycles _issue_time) + : pkt(_pkt), m_type(_m_type), issue_time(_issue_time) + {} +}; + +std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj); + +class GPUCoalescer : public RubyPort +{ + public: + typedef RubyGPUCoalescerParams Params; + GPUCoalescer(const Params *); + ~GPUCoalescer(); + + // Public Methods + void wakeup(); // Used only for deadlock detection + + void printProgress(std::ostream& out) const; + void resetStats(); + void collateStats(); + void regStats(); + + void writeCallback(Addr address, DataBlock& data); + + void writeCallback(Addr address, + MachineType mach, + DataBlock& data); + + void writeCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion); + + void writeCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime); + + void readCallback(Addr address, DataBlock& data); + + void readCallback(Addr address, + MachineType mach, + DataBlock& data); + + void readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime); + + void readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion); + /* atomics need their own callback because the data + might be const coming from SLICC */ + void atomicCallback(Addr address, + MachineType mach, + const DataBlock& data); + + void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID); + void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID); + + // Alternate implementations in VIPER Coalescer + virtual RequestStatus makeRequest(PacketPtr pkt); + + int outstandingCount() const { return m_outstanding_count; } + + bool + isDeadlockEventScheduled() const + { + return deadlockCheckEvent.scheduled(); + } + + void + descheduleDeadlockEvent() + { + deschedule(deadlockCheckEvent); + } + + bool empty() const; + + void print(std::ostream& out) const; + void checkCoherence(Addr address); + + void markRemoved(); + void removeRequest(GPUCoalescerRequest* request); + void evictionCallback(Addr address); + void completeIssue(); + + void insertKernel(int wavefront_id, PacketPtr pkt); + + void recordRequestType(SequencerRequestType requestType); + Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; } + + Stats::Histogram& getLatencyHist() { return m_latencyHist; } + Stats::Histogram& getTypeLatencyHist(uint32_t t) + { return *m_typeLatencyHist[t]; } + + Stats::Histogram& getMissLatencyHist() + { return m_missLatencyHist; } + Stats::Histogram& getMissTypeLatencyHist(uint32_t t) + { return *m_missTypeLatencyHist[t]; } + + Stats::Histogram& getMissMachLatencyHist(uint32_t t) const + { return *m_missMachLatencyHist[t]; } + + Stats::Histogram& + getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const + { return *m_missTypeMachLatencyHist[r][t]; } + + Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const + { return *m_IssueToInitialDelayHist[t]; } + + Stats::Histogram& + getInitialToForwardDelayHist(const MachineType t) const + { return *m_InitialToForwardDelayHist[t]; } + + Stats::Histogram& + getForwardRequestToFirstResponseHist(const MachineType t) const + { return *m_ForwardToFirstResponseDelayHist[t]; } + + Stats::Histogram& + getFirstResponseToCompletionDelayHist(const MachineType t) const + { return *m_FirstResponseToCompletionDelayHist[t]; } + + // Changed to protected to enable inheritance by VIPER Coalescer + protected: + bool tryCacheAccess(Addr addr, RubyRequestType type, + Addr pc, RubyAccessMode access_mode, + int size, DataBlock*& data_ptr); + // Alternate implementations in VIPER Coalescer + virtual void issueRequest(PacketPtr pkt, RubyRequestType type); + + void kernelCallback(int wavfront_id); + + void hitCallback(GPUCoalescerRequest* request, + MachineType mach, + DataBlock& data, + bool success, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion); + void recordMissLatency(GPUCoalescerRequest* request, + MachineType mach, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool success, bool isRegion); + void completeHitCallback(std::vector<PacketPtr> & mylist, int len); + PacketPtr mapAddrToPkt(Addr address); + + + RequestStatus getRequestStatus(PacketPtr pkt, + RubyRequestType request_type); + bool insertRequest(PacketPtr pkt, RubyRequestType request_type); + + bool handleLlsc(Addr address, GPUCoalescerRequest* request); + + // Private copy constructor and assignment operator + GPUCoalescer(const GPUCoalescer& obj); + GPUCoalescer& operator=(const GPUCoalescer& obj); + + class IssueEvent : public Event + { + private: + GPUCoalescer *seq; + public: + IssueEvent(GPUCoalescer *_seq); + void process(); + const char *description() const; + }; + + IssueEvent issueEvent; + + + // Changed to protected to enable inheritance by VIPER Coalescer + protected: + int m_max_outstanding_requests; + int m_deadlock_threshold; + + CacheMemory* m_dataCache_ptr; + CacheMemory* m_instCache_ptr; + + // The cache access latency for this GPU data cache. This is assessed at the + // beginning of each access. This should be very similar to the + // implementation in Sequencer() as this is very much like a Sequencer + Cycles m_data_cache_hit_latency; + + // We need to track both the primary and secondary request types. + // The secondary request type comprises a subset of RubyRequestTypes that + // are understood by the L1 Controller. A primary request type can be any + // RubyRequestType. + enum {PrimaryType, SecondaryType}; + typedef std::pair<PacketPtr, std::vector<RubyRequestType> > RequestDesc; + typedef std::unordered_map<Addr, std::vector<RequestDesc> > CoalescingTable; + CoalescingTable reqCoalescer; + std::vector<Addr> newRequests; + + typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable; + RequestTable m_writeRequestTable; + RequestTable m_readRequestTable; + // Global outstanding request count, across all request tables + int m_outstanding_count; + bool m_deadlock_check_scheduled; + std::unordered_map<int, PacketPtr> kernelEndList; + std::vector<int> newKernelEnds; + + int m_store_waiting_on_load_cycles; + int m_store_waiting_on_store_cycles; + int m_load_waiting_on_store_cycles; + int m_load_waiting_on_load_cycles; + + bool m_usingNetworkTester; + + class GPUCoalescerWakeupEvent : public Event + { + private: + GPUCoalescer *m_GPUCoalescer_ptr; + + public: + GPUCoalescerWakeupEvent(GPUCoalescer *_seq) : + m_GPUCoalescer_ptr(_seq) {} + void process() { m_GPUCoalescer_ptr->wakeup(); } + const char *description() const + { + return "GPUCoalescer deadlock check"; + } + }; + + GPUCoalescerWakeupEvent deadlockCheckEvent; + bool assumingRfOCoherence; + + // m5 style stats for TCP hit/miss counts + Stats::Scalar GPU_TCPLdHits; + Stats::Scalar GPU_TCPLdTransfers; + Stats::Scalar GPU_TCCLdHits; + Stats::Scalar GPU_LdMiss; + + Stats::Scalar GPU_TCPStHits; + Stats::Scalar GPU_TCPStTransfers; + Stats::Scalar GPU_TCCStHits; + Stats::Scalar GPU_StMiss; + + Stats::Scalar CP_TCPLdHits; + Stats::Scalar CP_TCPLdTransfers; + Stats::Scalar CP_TCCLdHits; + Stats::Scalar CP_LdMiss; + + Stats::Scalar CP_TCPStHits; + Stats::Scalar CP_TCPStTransfers; + Stats::Scalar CP_TCCStHits; + Stats::Scalar CP_StMiss; + + //! Histogram for number of outstanding requests per cycle. + Stats::Histogram m_outstandReqHist; + + //! Histogram for holding latency profile of all requests. + Stats::Histogram m_latencyHist; + std::vector<Stats::Histogram *> m_typeLatencyHist; + + //! Histogram for holding latency profile of all requests that + //! miss in the controller connected to this sequencer. + Stats::Histogram m_missLatencyHist; + std::vector<Stats::Histogram *> m_missTypeLatencyHist; + + //! Histograms for profiling the latencies for requests that + //! required external messages. + std::vector<Stats::Histogram *> m_missMachLatencyHist; + std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist; + + //! Histograms for recording the breakdown of miss latency + std::vector<Stats::Histogram *> m_IssueToInitialDelayHist; + std::vector<Stats::Histogram *> m_InitialToForwardDelayHist; + std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist; + std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist; +}; + +inline std::ostream& +operator<<(std::ostream& out, const GPUCoalescer& obj) +{ + obj.print(out); + out << std::flush; + return out; +} + +#endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ + diff --git a/src/mem/ruby/system/GPUCoalescer.py b/src/mem/ruby/system/GPUCoalescer.py new file mode 100644 index 000000000..0c19f875d --- /dev/null +++ b/src/mem/ruby/system/GPUCoalescer.py @@ -0,0 +1,48 @@ +# Copyright (c) 2015 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Steve Reinhardt +# Brad Beckmann + +from m5.params import * +from m5.proxy import * +from Sequencer import * + +class RubyGPUCoalescer(RubySequencer): + type = 'RubyGPUCoalescer' + cxx_class = 'GPUCoalescer' + cxx_header = "mem/ruby/system/GPUCoalescer.hh" + + # max_outstanding_requests = (wave front slots) x (wave front size) + max_outstanding_requests = Param.Int(40*64, + "max requests (incl. prefetches) outstanding") + assume_rfo = Param.Bool(True, "assume protocol implementes Read for " + "Ownership coherence"); diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 5a5f528bb..bf4002126 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -60,7 +60,8 @@ RubyPort::RubyPort(const Params *p) memSlavePort(csprintf("%s-mem-slave-port", name()), this, p->ruby_system->getAccessBackingStore(), -1, p->no_retry_on_stall), - gotAddrRanges(p->port_master_connection_count) + gotAddrRanges(p->port_master_connection_count), + m_isCPUSequencer(p->is_cpu_sequencer) { assert(m_version != -1); diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 07e0fde5a..6bd92b654 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -167,6 +167,8 @@ class RubyPort : public MemObject uint32_t getId() { return m_version; } DrainState drain() override; + bool isCPUSequencer() { return m_isCPUSequencer; } + protected: void trySendRetries(); void ruby_hit_callback(PacketPtr pkt); @@ -218,6 +220,8 @@ class RubyPort : public MemObject // that should be called when the Sequencer becomes available after a stall. // std::vector<MemSlavePort *> retryList; + + bool m_isCPUSequencer; }; #endif // __MEM_RUBY_SYSTEM_RUBYPORT_HH__ diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc index 1ecd2e098..e1717e519 100644 --- a/src/mem/ruby/system/RubySystem.cc +++ b/src/mem/ruby/system/RubySystem.cc @@ -107,7 +107,7 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace, Sequencer* sequencer_ptr = NULL; for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { - sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); + sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer()); if (sequencer_ptr == NULL) { sequencer_ptr = sequencer_map[cntrl]; } diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript index 8c5077362..b67311bca 100644 --- a/src/mem/ruby/system/SConscript +++ b/src/mem/ruby/system/SConscript @@ -33,12 +33,22 @@ Import('*') if env['PROTOCOL'] == 'None': Return() +if env['BUILD_GPU']: + SimObject('GPUCoalescer.py') SimObject('RubySystem.py') SimObject('Sequencer.py') +SimObject('WeightedLRUReplacementPolicy.py') +if env['BUILD_GPU']: + SimObject('VIPERCoalescer.py') Source('CacheRecorder.cc') Source('DMASequencer.cc') +if env['BUILD_GPU']: + Source('GPUCoalescer.cc') Source('RubyPort.cc') Source('RubyPortProxy.cc') Source('RubySystem.cc') Source('Sequencer.cc') +if env['BUILD_GPU']: + Source('VIPERCoalescer.cc') +Source('WeightedLRUPolicy.cc') diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 50418c700..c2727b41d 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -63,6 +63,7 @@ Sequencer::Sequencer(const Params *p) m_max_outstanding_requests = p->max_outstanding_requests; m_deadlock_threshold = p->deadlock_threshold; + m_coreId = p->coreid; // for tracking the two CorePair sequencers assert(m_max_outstanding_requests > 0); assert(m_deadlock_threshold > 0); assert(m_instCache_ptr != NULL); @@ -593,6 +594,8 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) ContextID proc_id = pkt->req->hasContextId() ? pkt->req->contextId() : InvalidContextID; + ContextID core_id = coreId(); + // If valid, copy the pc to the ruby request Addr pc = 0; if (pkt->req->hasPC()) { @@ -607,7 +610,7 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) nullptr : pkt->getPtr<uint8_t>(), pkt->getSize(), pc, secondary_type, RubyAccessMode_Supervisor, pkt, - PrefetchBit_No, proc_id); + PrefetchBit_No, proc_id, core_id); DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n", curTick(), m_version, "Seq", "Begin", "", "", diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 47af7ea1e..2a2f49587 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -99,6 +99,7 @@ class Sequencer : public RubyPort void markRemoved(); void evictionCallback(Addr address); void invalidateSC(Addr address); + int coreId() const { return m_coreId; } void recordRequestType(SequencerRequestType requestType); Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; } @@ -198,6 +199,8 @@ class Sequencer : public RubyPort Stats::Scalar m_load_waiting_on_store; Stats::Scalar m_load_waiting_on_load; + int m_coreId; + bool m_usingNetworkTester; //! Histogram for number of outstanding requests per cycle. diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index 7c90eb29c..d6ee0aa2f 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -32,54 +32,58 @@ from m5.proxy import * from MemObject import MemObject class RubyPort(MemObject): - type = 'RubyPort' - abstract = True - cxx_header = "mem/ruby/system/RubyPort.hh" - version = Param.Int(0, "") + type = 'RubyPort' + abstract = True + cxx_header = "mem/ruby/system/RubyPort.hh" + version = Param.Int(0, "") - slave = VectorSlavePort("CPU slave port") - master = VectorMasterPort("CPU master port") - pio_master_port = MasterPort("Ruby mem master port") - mem_master_port = MasterPort("Ruby mem master port") - pio_slave_port = SlavePort("Ruby pio slave port") - mem_slave_port = SlavePort("Ruby memory port") + slave = VectorSlavePort("CPU slave port") + master = VectorMasterPort("CPU master port") + pio_master_port = MasterPort("Ruby mem master port") + mem_master_port = MasterPort("Ruby mem master port") + pio_slave_port = SlavePort("Ruby pio slave port") + mem_slave_port = SlavePort("Ruby memory port") - using_ruby_tester = Param.Bool(False, "") - no_retry_on_stall = Param.Bool(False, "") - ruby_system = Param.RubySystem(Parent.any, "") - system = Param.System(Parent.any, "system object") - support_data_reqs = Param.Bool(True, "data cache requests supported") - support_inst_reqs = Param.Bool(True, "inst cache requests supported") + using_ruby_tester = Param.Bool(False, "") + no_retry_on_stall = Param.Bool(False, "") + ruby_system = Param.RubySystem(Parent.any, "") + system = Param.System(Parent.any, "system object") + support_data_reqs = Param.Bool(True, "data cache requests supported") + support_inst_reqs = Param.Bool(True, "inst cache requests supported") + is_cpu_sequencer = Param.Bool(True, "connected to a cpu") class RubyPortProxy(RubyPort): - type = 'RubyPortProxy' - cxx_header = "mem/ruby/system/RubyPortProxy.hh" + type = 'RubyPortProxy' + cxx_header = "mem/ruby/system/RubyPortProxy.hh" class RubySequencer(RubyPort): - type = 'RubySequencer' - cxx_class = 'Sequencer' - cxx_header = "mem/ruby/system/Sequencer.hh" + type = 'RubySequencer' + cxx_class = 'Sequencer' + cxx_header = "mem/ruby/system/Sequencer.hh" - icache = Param.RubyCache("") - dcache = Param.RubyCache("") - # Cache latencies currently assessed at the beginning of each access - # NOTE: Setting these values to a value greater than one will result in - # O3 CPU pipeline bubbles and negatively impact performance - # TODO: Latencies should be migrated into each top-level cache controller - icache_hit_latency = Param.Cycles(1, "Inst cache hit latency") - dcache_hit_latency = Param.Cycles(1, "Data cache hit latency") - max_outstanding_requests = Param.Int(16, - "max requests (incl. prefetches) outstanding") - deadlock_threshold = Param.Cycles(500000, - "max outstanding cycles for a request before deadlock/livelock declared") - using_network_tester = Param.Bool(False, "") + icache = Param.RubyCache("") + dcache = Param.RubyCache("") + # Cache latencies currently assessed at the beginning of each access + # NOTE: Setting these values to a value greater than one will result in + # O3 CPU pipeline bubbles and negatively impact performance + # TODO: Latencies should be migrated into each top-level cache controller + icache_hit_latency = Param.Cycles(1, "Inst cache hit latency") + dcache_hit_latency = Param.Cycles(1, "Data cache hit latency") + max_outstanding_requests = Param.Int(16, + "max requests (incl. prefetches) outstanding") + deadlock_threshold = Param.Cycles(500000, + "max outstanding cycles for a request before deadlock/livelock declared") + using_network_tester = Param.Bool(False, "") + # id used by protocols that support multiple sequencers per controller + # 99 is the dummy default value + coreid = Param.Int(99, "CorePair core id") class DMASequencer(MemObject): - type = 'DMASequencer' - cxx_header = "mem/ruby/system/DMASequencer.hh" + type = 'DMASequencer' + cxx_header = "mem/ruby/system/DMASequencer.hh" - version = Param.Int(0, "") - slave = SlavePort("Device slave port") - using_ruby_tester = Param.Bool(False, "") - ruby_system = Param.RubySystem(Parent.any, "") - system = Param.System(Parent.any, "system object") + version = Param.Int(0, "") + slave = SlavePort("Device slave port") + using_ruby_tester = Param.Bool(False, "") + ruby_system = Param.RubySystem(Parent.any, "") + system = Param.System(Parent.any, "system object") diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc new file mode 100644 index 000000000..ca91f2723 --- /dev/null +++ b/src/mem/ruby/system/VIPERCoalescer.cc @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Sooraj Puthoor + */ + +#include "base/misc.hh" +#include "base/str.hh" +#include "config/the_isa.hh" + +#if THE_ISA == X86_ISA +#include "arch/x86/insts/microldstop.hh" + +#endif // X86_ISA +#include "mem/ruby/system/VIPERCoalescer.hh" + +#include "cpu/testers/rubytest/RubyTester.hh" +#include "debug/GPUCoalescer.hh" +#include "debug/MemoryAccess.hh" +#include "mem/packet.hh" +#include "mem/ruby/common/SubBlock.hh" +#include "mem/ruby/network/MessageBuffer.hh" +#include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/slicc_interface/AbstractController.hh" +#include "mem/ruby/slicc_interface/RubyRequest.hh" +#include "mem/ruby/structures/CacheMemory.hh" +#include "mem/ruby/system/GPUCoalescer.hh" +#include "mem/ruby/system/RubySystem.hh" +#include "params/VIPERCoalescer.hh" + +using namespace std; + +VIPERCoalescer * +VIPERCoalescerParams::create() +{ + return new VIPERCoalescer(this); +} + +VIPERCoalescer::VIPERCoalescer(const Params *p) + : GPUCoalescer(p) +{ + m_max_wb_per_cycle=p->max_wb_per_cycle; + m_max_inv_per_cycle=p->max_inv_per_cycle; + m_outstanding_inv = 0; + m_outstanding_wb = 0; +} + +VIPERCoalescer::~VIPERCoalescer() +{ +} + +// Analyzes the packet to see if this request can be coalesced. +// If request can be coalesced, this request is added to the reqCoalescer table +// and makeRequest returns RequestStatus_Issued; +// If this is the first request to a cacheline, request is added to both +// newRequests queue and to the reqCoalescer table; makeRequest +// returns RequestStatus_Issued. +// If there is a pending request to this cacheline and this request +// can't be coalesced, RequestStatus_Aliased is returned and +// the packet needs to be reissued. +RequestStatus +VIPERCoalescer::makeRequest(PacketPtr pkt) +{ + if (m_outstanding_wb | m_outstanding_inv) { + DPRINTF(GPUCoalescer, + "There are %d Writebacks and %d Invalidatons\n", + m_outstanding_wb, m_outstanding_inv); + } + // Are we in the middle of a release + if ((m_outstanding_wb) > 0) { + if (pkt->req->isKernel()) { + // Everythign is fine + // Barriers and Kernel End scan coalesce + // If it is a Kerenl Begin flush the cache + if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) { + invL1(); + } + + if (pkt->req->isRelease()) { + insertKernel(pkt->req->contextId(), pkt); + } + + return RequestStatus_Issued; + } +// return RequestStatus_Aliased; + } else if (pkt->req->isKernel() && pkt->req->isRelease()) { + // Flush Dirty Data on Kernel End + // isKernel + isRelease + insertKernel(pkt->req->contextId(), pkt); + wbL1(); + if(m_outstanding_wb == 0) { + for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { + newKernelEnds.push_back(it->first); + } + completeIssue(); + } + return RequestStatus_Issued; + } + RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt); + if (requestStatus!=RequestStatus_Issued) { + // Request not isssued + // enqueue Retry + DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n"); + return requestStatus; + } else if (pkt->req->isKernel() && pkt->req->isAcquire()) { + // Invalidate clean Data on Kernel Begin + // isKernel + isAcquire + invL1(); + } else if (pkt->req->isAcquire() && pkt->req->isRelease()) { + // Deschedule the AtomicAcqRel and + // Flush and Invalidate the L1 cache + invwbL1(); + if (m_outstanding_wb > 0 && issueEvent.scheduled()) { + DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); + deschedule(issueEvent); + } + } else if (pkt->req->isRelease()) { + // Deschedule the StoreRel and + // Flush the L1 cache + wbL1(); + if (m_outstanding_wb > 0 && issueEvent.scheduled()) { + DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); + deschedule(issueEvent); + } + } else if (pkt->req->isAcquire()) { + // LoadAcq or AtomicAcq + // Invalidate the L1 cache + invL1(); + } + // Request was successful + if (m_outstanding_wb == 0) { + if (!issueEvent.scheduled()) { + DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n"); + schedule(issueEvent, curTick()); + } + } + return RequestStatus_Issued; +} + +void +VIPERCoalescer::wbCallback(Addr addr) +{ + m_outstanding_wb--; + // if L1 Flush Complete + // attemnpt to schedule issueEvent + assert(((int) m_outstanding_wb) >= 0); + if (m_outstanding_wb == 0) { + for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { + newKernelEnds.push_back(it->first); + } + completeIssue(); + } + trySendRetries(); +} + +void +VIPERCoalescer::invCallback(Addr addr) +{ + m_outstanding_inv--; + // if L1 Flush Complete + // attemnpt to schedule issueEvent + // This probably won't happen, since + // we dont wait on cache invalidations + if (m_outstanding_wb == 0) { + for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { + newKernelEnds.push_back(it->first); + } + completeIssue(); + } + trySendRetries(); +} + +/** + * Invalidate L1 cache (Acquire) + */ +void +VIPERCoalescer::invL1() +{ + int size = m_dataCache_ptr->getNumBlocks(); + DPRINTF(GPUCoalescer, + "There are %d Invalidations outstanding before Cache Walk\n", + m_outstanding_inv); + // Walk the cache + for (int i = 0; i < size; i++) { + Addr addr = m_dataCache_ptr->getAddressAtIdx(i); + // Evict Read-only data + std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( + clockEdge(), addr, (uint8_t*) 0, 0, 0, + RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, + nullptr); + assert(m_mandatory_q_ptr != NULL); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); + m_outstanding_inv++; + } + DPRINTF(GPUCoalescer, + "There are %d Invalidatons outstanding after Cache Walk\n", + m_outstanding_inv); +} + +/** + * Writeback L1 cache (Release) + */ +void +VIPERCoalescer::wbL1() +{ + int size = m_dataCache_ptr->getNumBlocks(); + DPRINTF(GPUCoalescer, + "There are %d Writebacks outstanding before Cache Walk\n", + m_outstanding_wb); + // Walk the cache + for (int i = 0; i < size; i++) { + Addr addr = m_dataCache_ptr->getAddressAtIdx(i); + // Write dirty data back + std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( + clockEdge(), addr, (uint8_t*) 0, 0, 0, + RubyRequestType_FLUSH, RubyAccessMode_Supervisor, + nullptr); + assert(m_mandatory_q_ptr != NULL); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); + m_outstanding_wb++; + } + DPRINTF(GPUCoalescer, + "There are %d Writebacks outstanding after Cache Walk\n", + m_outstanding_wb); +} + +/** + * Invalidate and Writeback L1 cache (Acquire&Release) + */ +void +VIPERCoalescer::invwbL1() +{ + int size = m_dataCache_ptr->getNumBlocks(); + // Walk the cache + for(int i = 0; i < size; i++) { + Addr addr = m_dataCache_ptr->getAddressAtIdx(i); + // Evict Read-only data + std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( + clockEdge(), addr, (uint8_t*) 0, 0, 0, + RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, + nullptr); + assert(m_mandatory_q_ptr != NULL); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); + m_outstanding_inv++; + } + // Walk the cache + for(int i = 0; i< size; i++) { + Addr addr = m_dataCache_ptr->getAddressAtIdx(i); + // Write dirty data back + std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( + clockEdge(), addr, (uint8_t*) 0, 0, 0, + RubyRequestType_FLUSH, RubyAccessMode_Supervisor, + nullptr); + assert(m_mandatory_q_ptr != NULL); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); + m_outstanding_wb++; + } +} diff --git a/src/mem/ruby/system/VIPERCoalescer.hh b/src/mem/ruby/system/VIPERCoalescer.hh new file mode 100644 index 000000000..af6e44e7f --- /dev/null +++ b/src/mem/ruby/system/VIPERCoalescer.hh @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Sooraj Puthoor + */ + +#ifndef __MEM_RUBY_SYSTEM_VI_COALESCER_HH__ +#define __MEM_RUBY_SYSTEM_VI_COALESCER_HH__ + +#include <iostream> + +#include "mem/protocol/PrefetchBit.hh" +#include "mem/protocol/RubyAccessMode.hh" +#include "mem/protocol/RubyRequestType.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Consumer.hh" +#include "mem/ruby/system/GPUCoalescer.hh" +#include "mem/ruby/system/RubyPort.hh" + +class DataBlock; +class CacheMsg; +class MachineID; +class CacheMemory; + +class VIPERCoalescerParams; + +class VIPERCoalescer : public GPUCoalescer +{ + public: + typedef VIPERCoalescerParams Params; + VIPERCoalescer(const Params *); + ~VIPERCoalescer(); + void wbCallback(Addr address); + void invCallback(Addr address); + RequestStatus makeRequest(PacketPtr pkt); + private: + void invL1(); + void wbL1(); + void invwbL1(); + uint64_t m_outstanding_inv; + uint64_t m_outstanding_wb; + uint64_t m_max_inv_per_cycle; + uint64_t m_max_wb_per_cycle; +}; +#endif // __MEM_RUBY_SYSTEM_VI_COALESCER_HH__ + diff --git a/src/mem/ruby/system/VIPERCoalescer.py b/src/mem/ruby/system/VIPERCoalescer.py new file mode 100644 index 000000000..05c74386f --- /dev/null +++ b/src/mem/ruby/system/VIPERCoalescer.py @@ -0,0 +1,45 @@ +# Copyright (c) 2015 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Steve Reinhardt +# Brad Beckmann + +from m5.params import * +from m5.proxy import * +from GPUCoalescer import * + +class VIPERCoalescer(RubyGPUCoalescer): + type = 'VIPERCoalescer' + cxx_class = 'VIPERCoalescer' + cxx_header = "mem/ruby/system/VIPERCoalescer.hh" + max_inv_per_cycle = Param.Int(32, "max invalidations per cycle") + max_wb_per_cycle = Param.Int(32, "max writebacks per cycle") + assume_rfo = False diff --git a/src/mem/ruby/system/WeightedLRUPolicy.cc b/src/mem/ruby/system/WeightedLRUPolicy.cc new file mode 100644 index 000000000..5baa4d9a5 --- /dev/null +++ b/src/mem/ruby/system/WeightedLRUPolicy.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Derek Hower + */ + +#include "mem/ruby/system/WeightedLRUPolicy.hh" + +WeightedLRUPolicy::WeightedLRUPolicy(const Params* p) + : AbstractReplacementPolicy(p), m_cache(p->cache) +{ + m_last_occ_ptr = new int*[m_num_sets]; + for(unsigned i = 0; i < m_num_sets; i++){ + m_last_occ_ptr[i] = new int[m_assoc]; + for(unsigned j = 0; j < m_assoc; j++){ + m_last_occ_ptr[i][j] = 0; + } + } +} + +WeightedLRUPolicy * +WeightedLRUReplacementPolicyParams::create() +{ + return new WeightedLRUPolicy(this); +} + +WeightedLRUPolicy::~WeightedLRUPolicy() +{ + if (m_last_occ_ptr != NULL){ + for (unsigned i = 0; i < m_num_sets; i++){ + if (m_last_occ_ptr[i] != NULL){ + delete[] m_last_occ_ptr[i]; + } + } + delete[] m_last_occ_ptr; + } +} + +void +WeightedLRUPolicy::touch(int64_t set, int64_t index, Tick time) +{ + assert(index >= 0 && index < m_assoc); + assert(set >= 0 && set < m_num_sets); + + m_last_ref_ptr[set][index] = time; +} + +void +WeightedLRUPolicy::touch(int64_t set, int64_t index, Tick time, int occupancy) +{ + assert(index >= 0 && index < m_assoc); + assert(set >= 0 && set < m_num_sets); + + m_last_ref_ptr[set][index] = time; + m_last_occ_ptr[set][index] = occupancy; +} + +int64_t +WeightedLRUPolicy::getVictim(int64_t set) const +{ + Tick time, smallest_time; + int64_t smallest_index; + + smallest_index = 0; + smallest_time = m_last_ref_ptr[set][0]; + int smallest_weight = m_last_ref_ptr[set][0]; + + for (unsigned i = 1; i < m_assoc; i++) { + + int weight = m_last_occ_ptr[set][i]; + if (weight < smallest_weight) { + smallest_weight = weight; + smallest_index = i; + smallest_time = m_last_ref_ptr[set][i]; + } else if (weight == smallest_weight) { + time = m_last_ref_ptr[set][i]; + if (time < smallest_time) { + smallest_index = i; + smallest_time = time; + } + } + } + return smallest_index; +} diff --git a/src/mem/ruby/system/WeightedLRUPolicy.hh b/src/mem/ruby/system/WeightedLRUPolicy.hh new file mode 100644 index 000000000..3150779b2 --- /dev/null +++ b/src/mem/ruby/system/WeightedLRUPolicy.hh @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Sooraj Puthoor + */ + +#ifndef __MEM_RUBY_SYSTEM_WEIGHTEDLRUPOLICY_HH__ +#define __MEM_RUBY_SYSTEM_WEIGHTEDLRUPOLICY_HH__ + +#include "mem/ruby/structures/AbstractReplacementPolicy.hh" +#include "mem/ruby/structures/CacheMemory.hh" +#include "params/WeightedLRUReplacementPolicy.hh" + +/* Simple true LRU replacement policy */ + +class WeightedLRUPolicy : public AbstractReplacementPolicy +{ + public: + typedef WeightedLRUReplacementPolicyParams Params; + WeightedLRUPolicy(const Params* p); + ~WeightedLRUPolicy(); + + void touch(int64_t set, int64_t way, Tick time); + void touch(int64_t set, int64_t way, Tick time, int occupancy); + int64_t getVictim(int64_t set) const override; + + bool useOccupancy() const { return true; } + + CacheMemory * m_cache; + int **m_last_occ_ptr; +}; + +#endif // __MEM_RUBY_SYSTEM_WeightedLRUPolicy_HH__ diff --git a/src/mem/ruby/system/WeightedLRUReplacementPolicy.py b/src/mem/ruby/system/WeightedLRUReplacementPolicy.py new file mode 100644 index 000000000..e7de33496 --- /dev/null +++ b/src/mem/ruby/system/WeightedLRUReplacementPolicy.py @@ -0,0 +1,45 @@ +# +# Copyright (c) 2013-2015 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: Derek Hower +# + +from m5.params import * +from m5.proxy import * +from MemObject import MemObject +from ReplacementPolicy import ReplacementPolicy + +class WeightedLRUReplacementPolicy(ReplacementPolicy): + type = "WeightedLRUReplacementPolicy" + cxx_class = "WeightedLRUPolicy" + cxx_header = "mem/ruby/system/WeightedLRUPolicy.hh" + cache = Param.RubyCache("") diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index a530307ee..fc3f32c3d 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -35,13 +35,17 @@ import re python_class_map = { "int": "Int", + "NodeID": "Int", "uint32_t" : "UInt32", "std::string": "String", "bool": "Bool", "CacheMemory": "RubyCache", "WireBuffer": "RubyWireBuffer", "Sequencer": "RubySequencer", + "GPUCoalescer" : "RubyGPUCoalescer", + "VIPERCoalescer" : "VIPERCoalescer", "DirectoryMemory": "RubyDirectoryMemory", + "PerfectCacheMemory": "RubyPerfectCacheMemory", "MemoryControl": "MemoryControl", "MessageBuffer": "MessageBuffer", "DMASequencer": "DMASequencer", @@ -305,7 +309,7 @@ class $c_ident : public AbstractController void collateStats(); void recordCacheTrace(int cntrl, CacheRecorder* tr); - Sequencer* getSequencer() const; + Sequencer* getCPUSequencer() const; int functionalWriteBuffers(PacketPtr&); @@ -527,8 +531,14 @@ $c_ident::$c_ident(const Params *p) else: code('m_${{param.ident}} = p->${{param.ident}};') - if re.compile("sequencer").search(param.ident): - code('m_${{param.ident}}_ptr->setController(this);') + if re.compile("sequencer").search(param.ident) or \ + param.type_ast.type.c_ident == "GPUCoalescer" or \ + param.type_ast.type.c_ident == "VIPERCoalescer": + code(''' +if (m_${{param.ident}}_ptr != NULL) { + m_${{param.ident}}_ptr->setController(this); +} +''') code(''' @@ -670,6 +680,28 @@ $c_ident::init() assert(param.pointer) seq_ident = "m_%s_ptr" % param.ident + if seq_ident != "NULL": + code(''' +Sequencer* +$c_ident::getCPUSequencer() const +{ + if (NULL != $seq_ident && $seq_ident->isCPUSequencer()) { + return $seq_ident; + } else { + return NULL; + } +} +''') + else: + code(''' + +Sequencer* +$c_ident::getCPUSequencer() const +{ + return NULL; +} +''') + code(''' void @@ -796,12 +828,6 @@ $c_ident::getMemoryQueue() const return $memq_ident; } -Sequencer* -$c_ident::getSequencer() const -{ - return $seq_ident; -} - void $c_ident::print(ostream& out) const { |