summaryrefslogtreecommitdiff
path: root/src/mem
diff options
context:
space:
mode:
authorTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
committerTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
commit1a7d3f9fcb76a68540dd948f91413533a383bfde (patch)
tree867510a147cd095f19499d26b7c02d27de4cae9d /src/mem
parent28e353e0403ea379d244a418e8dc8ee0b48187cf (diff)
downloadgem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz
gpu-compute: AMD's baseline GPU model
Diffstat (limited to 'src/mem')
-rw-r--r--src/mem/protocol/GPU_RfO-SQC.sm667
-rw-r--r--src/mem/protocol/GPU_RfO-TCC.sm1199
-rw-r--r--src/mem/protocol/GPU_RfO-TCCdir.sm2672
-rw-r--r--src/mem/protocol/GPU_RfO-TCP.sm1009
-rw-r--r--src/mem/protocol/GPU_RfO.slicc11
-rw-r--r--src/mem/protocol/GPU_VIPER-SQC.sm322
-rw-r--r--src/mem/protocol/GPU_VIPER-TCC.sm739
-rw-r--r--src/mem/protocol/GPU_VIPER-TCP.sm747
-rw-r--r--src/mem/protocol/GPU_VIPER.slicc9
-rw-r--r--src/mem/protocol/GPU_VIPER_Baseline.slicc9
-rw-r--r--src/mem/protocol/GPU_VIPER_Region-TCC.sm773
-rw-r--r--src/mem/protocol/GPU_VIPER_Region.slicc11
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-CorePair.sm2904
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-L3cache.sm1130
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm3009
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-Region-dir.sm2038
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-Region-msg.sm291
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm1368
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-RegionDir.sm1187
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-dir.sm1137
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-msg.sm362
-rw-r--r--src/mem/protocol/MOESI_AMD_Base-probeFilter.sm1408
-rw-r--r--src/mem/protocol/MOESI_AMD_Base.slicc6
-rw-r--r--src/mem/protocol/RubySlicc_ComponentMapping.sm3
-rw-r--r--src/mem/protocol/RubySlicc_Exports.sm11
-rw-r--r--src/mem/protocol/RubySlicc_Types.sm45
-rw-r--r--src/mem/protocol/SConsopts5
-rw-r--r--src/mem/ruby/SConscript15
-rw-r--r--src/mem/ruby/profiler/Profiler.cc4
-rw-r--r--src/mem/ruby/slicc_interface/AbstractCacheEntry.hh6
-rw-r--r--src/mem/ruby/slicc_interface/AbstractController.cc6
-rw-r--r--src/mem/ruby/slicc_interface/AbstractController.hh3
-rw-r--r--src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh29
-rw-r--r--src/mem/ruby/structures/CacheMemory.cc50
-rw-r--r--src/mem/ruby/structures/CacheMemory.hh5
-rw-r--r--src/mem/ruby/structures/RubyCache.py1
-rw-r--r--src/mem/ruby/system/GPUCoalescer.cc1397
-rw-r--r--src/mem/ruby/system/GPUCoalescer.hh368
-rw-r--r--src/mem/ruby/system/GPUCoalescer.py48
-rw-r--r--src/mem/ruby/system/RubyPort.cc3
-rw-r--r--src/mem/ruby/system/RubyPort.hh4
-rw-r--r--src/mem/ruby/system/RubySystem.cc2
-rw-r--r--src/mem/ruby/system/SConscript10
-rw-r--r--src/mem/ruby/system/Sequencer.cc5
-rw-r--r--src/mem/ruby/system/Sequencer.hh3
-rw-r--r--src/mem/ruby/system/Sequencer.py86
-rw-r--r--src/mem/ruby/system/VIPERCoalescer.cc287
-rw-r--r--src/mem/ruby/system/VIPERCoalescer.hh75
-rw-r--r--src/mem/ruby/system/VIPERCoalescer.py45
-rw-r--r--src/mem/ruby/system/WeightedLRUPolicy.cc113
-rw-r--r--src/mem/ruby/system/WeightedLRUPolicy.hh62
-rw-r--r--src/mem/ruby/system/WeightedLRUReplacementPolicy.py45
-rw-r--r--src/mem/slicc/symbols/StateMachine.py44
53 files changed, 25713 insertions, 75 deletions
diff --git a/src/mem/protocol/GPU_RfO-SQC.sm b/src/mem/protocol/GPU_RfO-SQC.sm
new file mode 100644
index 000000000..1e5f8df74
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO-SQC.sm
@@ -0,0 +1,667 @@
+/*
+ * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
+ : Sequencer* sequencer;
+ CacheMemory * L1cache;
+ int TCC_select_num_bits;
+ Cycles issue_latency := 80; // time to send data down to TCC
+ Cycles l2_hit_latency := 18;
+
+ MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request";
+ MessageBuffer * responseFromSQC, network="To", virtual_network="3", vnet_type="response";
+ MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
+
+ MessageBuffer * probeToSQC, network="From", virtual_network="1", vnet_type="request";
+ MessageBuffer * responseToSQC, network="From", virtual_network="3", vnet_type="response";
+
+ MessageBuffer * mandatoryQueue;
+{
+ state_declaration(State, desc="SQC Cache States", default="SQC_State_I") {
+ I, AccessPermission:Invalid, desc="Invalid";
+ S, AccessPermission:Read_Only, desc="Shared";
+
+ I_S, AccessPermission:Busy, desc="Invalid, issued RdBlkS, have not seen response yet";
+ S_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack";
+ I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCCdir for canceled WB";
+ }
+
+ enumeration(Event, desc="SQC Events") {
+ // Core initiated
+ Fetch, desc="Fetch";
+
+ //TCC initiated
+ TCC_AckS, desc="TCC Ack to Core Request";
+ TCC_AckWB, desc="TCC Ack for WB";
+ TCC_NackWB, desc="TCC Nack for WB";
+
+ // Mem sys initiated
+ Repl, desc="Replacing block from cache";
+
+ // Probe Events
+ PrbInvData, desc="probe, return M data";
+ PrbInv, desc="probe, no need for data";
+ PrbShrData, desc="probe downgrade, return data";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff than memory)?";
+ DataBlock DataBlk, desc="data for the block";
+ bool FromL2, default="false", desc="block just moved from L2";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
+ bool Dirty, desc="Is the data dirty (different than memory)?";
+ int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
+ bool Shared, desc="Victim hit by shared probe";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<SQC_TBE>", constructor="m_number_of_TBEs";
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ // Internal functions
+ Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+ Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
+ return cache_entry;
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return tbe.DataBlk;
+ } else {
+ return getCacheEntry(addr).DataBlk;
+ }
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if(is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return SQC_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return SQC_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(SQC_State_to_permission(state));
+ }
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ // Out Ports
+
+ out_port(requestNetwork_out, CPURequestMsg, requestFromSQC);
+ out_port(responseNetwork_out, ResponseMsg, responseFromSQC);
+ out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
+
+ // In Ports
+
+ in_port(probeNetwork_in, TDProbeRequestMsg, probeToSQC) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == ProbeRequestType:PrbInv) {
+ if (in_msg.ReturnData) {
+ trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+ assert(in_msg.ReturnData);
+ trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+ in_port(responseToSQC_in, ResponseMsg, responseToSQC) {
+ if (responseToSQC_in.isReady(clockEdge())) {
+ peek(responseToSQC_in, ResponseMsg, block_on="addr") {
+
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == CoherenceResponseType:TDSysResp) {
+ if (in_msg.State == CoherenceState:Shared) {
+ trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("SQC should not receive TDSysResp other than CoherenceState:Shared");
+ }
+ } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
+ trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) {
+ trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+
+ in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+ if (mandatoryQueue_in.isReady(clockEdge())) {
+ peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+ Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+ TBE tbe := TBEs.lookup(in_msg.LineAddress);
+
+ assert(in_msg.Type == RubyRequestType:IFETCH);
+ if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+ trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe);
+ } else {
+ Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ }
+ }
+ }
+
+ // Actions
+
+ action(ic_invCache, "ic", desc="invalidate cache") {
+ if(is_valid(cache_entry)) {
+ L1cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkS;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(vc_victim, "vc", desc="Victimize E/S Data") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicClean;
+ out_msg.InitialRequestTime := curCycle();
+ if (cache_entry.CacheState == State:S) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(a_allocate, "a", desc="allocate block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L1cache.allocate(address, new Entry));
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ assert(is_valid(cache_entry));
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs
+ tbe.Dirty := cache_entry.Dirty;
+ tbe.Shared := false;
+ }
+
+ action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+ mandatoryQueue_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+ responseToSQC_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+
+ action(l_loadDone, "l", desc="local load done") {
+ assert(is_valid(cache_entry));
+ sequencer.readCallback(address, cache_entry.DataBlk,
+ false, MachineType:L1Cache);
+ APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
+ }
+
+ action(xl_loadDone, "xl", desc="remote load done") {
+ peek(responseToSQC_in, ResponseMsg) {
+ assert(is_valid(cache_entry));
+ sequencer.readCallback(address,
+ cache_entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
+ }
+ }
+
+ action(w_writeCache, "w", desc="write data to cache") {
+ peek(responseToSQC_in, ResponseMsg) {
+ assert(is_valid(cache_entry));
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+ peek(responseToSQC_in, ResponseMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:StaleNotif;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(wb_data, "wb", desc="write back data") {
+ peek(responseToSQC_in, ResponseMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUData;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Shared) {
+ out_msg.NbReqShared := true;
+ } else {
+ out_msg.NbReqShared := false;
+ }
+ out_msg.State := CoherenceState:Shared; // faux info
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false;
+ out_msg.Ntsl := true;
+ out_msg.Hit := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false; // only true if sending back data i think
+ out_msg.Hit := false;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry) || is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.DataBlk := getDataBlock(address);
+ if (is_valid(tbe)) {
+ out_msg.Dirty := tbe.Dirty;
+ } else {
+ out_msg.Dirty := cache_entry.Dirty;
+ }
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry) || is_valid(tbe));
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.DataBlk := getDataBlock(address);
+ if (is_valid(tbe)) {
+ out_msg.Dirty := tbe.Dirty;
+ } else {
+ out_msg.Dirty := cache_entry.Dirty;
+ }
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
+ assert(is_valid(tbe));
+ tbe.Shared := true;
+ }
+
+ action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+ enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
+ probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+ mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ // Transitions
+
+ // transitions from base
+ transition(I, Fetch, I_S) {TagArrayRead, TagArrayWrite} {
+ a_allocate;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ // simple hit transitions
+ transition(S, Fetch) {TagArrayRead, DataArrayRead} {
+ l_loadDone;
+ p_popMandatoryQueue;
+ }
+
+ // recycles from transients
+ transition({I_S, S_I, I_C}, {Fetch, Repl}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition(S, Repl, S_I) {TagArrayRead} {
+ t_allocateTBE;
+ vc_victim;
+ ic_invCache;
+ }
+
+ // TCC event
+ transition(I_S, TCC_AckS, S) {DataArrayRead, DataArrayWrite} {
+ w_writeCache;
+ xl_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(S_I, TCC_NackWB, I){TagArrayWrite} {
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(S_I, TCC_AckWB, I) {TagArrayWrite} {
+ wb_data;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(I_C, TCC_AckWB, I){TagArrayWrite} {
+ ss_sendStaleNotification;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(I_C, TCC_NackWB, I) {TagArrayWrite} {
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ // Probe transitions
+ transition({S, I}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
+ pd_sendProbeResponseData;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, PrbInvData, I_C) {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition({S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition({S}, PrbShrData, S) {DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({I, I_C}, PrbShrData) {TagArrayRead} {
+ prm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, PrbInv, I_C){
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(I_S, {PrbInv, PrbInvData}) {} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ a_allocate; // but make sure there is room for incoming data when it arrives
+ pp_popProbeQueue;
+ }
+
+ transition(I_S, PrbShrData) {} {
+ prm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ transition(S_I, PrbInvData, I_C) {TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(S_I, PrbInv, I_C) {TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(S_I, PrbShrData) {DataArrayRead} {
+ pd_sendProbeResponseData;
+ sf_setSharedFlip;
+ pp_popProbeQueue;
+ }
+}
diff --git a/src/mem/protocol/GPU_RfO-TCC.sm b/src/mem/protocol/GPU_RfO-TCC.sm
new file mode 100644
index 000000000..cfddb3f00
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO-TCC.sm
@@ -0,0 +1,1199 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:TCC, "TCC Cache")
+ : CacheMemory * L2cache;
+ WireBuffer * w_reqToTCCDir;
+ WireBuffer * w_respToTCCDir;
+ WireBuffer * w_TCCUnblockToTCCDir;
+ WireBuffer * w_reqToTCC;
+ WireBuffer * w_probeToTCC;
+ WireBuffer * w_respToTCC;
+ int TCC_select_num_bits;
+ Cycles l2_request_latency := 1;
+ Cycles l2_response_latency := 20;
+
+ // To the general response network
+ MessageBuffer * responseFromTCC, network="To", virtual_network="3", vnet_type="response";
+
+ // From the general response network
+ MessageBuffer * responseToTCC, network="From", virtual_network="3", vnet_type="response";
+
+{
+ // EVENTS
+ enumeration(Event, desc="TCC Events") {
+ // Requests coming from the Cores
+ RdBlk, desc="CPU RdBlk event";
+ RdBlkM, desc="CPU RdBlkM event";
+ RdBlkS, desc="CPU RdBlkS event";
+ CtoD, desc="Change to Dirty request";
+ WrVicBlk, desc="L1 Victim (dirty)";
+ WrVicBlkShared, desc="L1 Victim (dirty)";
+ ClVicBlk, desc="L1 Victim (clean)";
+ ClVicBlkShared, desc="L1 Victim (clean)";
+
+ CPUData, desc="WB data from CPU";
+ CPUDataShared, desc="WB data from CPU, NBReqShared 1";
+ StaleWB, desc="Stale WB, No data";
+
+ L2_Repl, desc="L2 Replacement";
+
+ // Probes
+ PrbInvData, desc="Invalidating probe, return dirty data";
+ PrbInv, desc="Invalidating probe, no need to return data";
+ PrbShrData, desc="Downgrading probe, return data";
+
+ // Coming from Memory Controller
+ WBAck, desc="ack from memory";
+
+ CancelWB, desc="Cancel WB from L2";
+ }
+
+ // STATES
+ state_declaration(State, desc="TCC State", default="TCC_State_I") {
+ M, AccessPermission:Read_Write, desc="Modified"; // No other cache has copy, memory stale
+ O, AccessPermission:Read_Only, desc="Owned"; // Correct most recent copy, others may exist in S
+ E, AccessPermission:Read_Write, desc="Exclusive"; // Correct, most recent, and only copy (and == Memory)
+ S, AccessPermission:Read_Only, desc="Shared"; // Correct, most recent. If no one in O, then == Memory
+ I, AccessPermission:Invalid, desc="Invalid";
+
+ I_M, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
+ I_O, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
+ I_E, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
+ I_S, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
+ S_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to M";
+ S_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
+ S_E, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to E";
+ S_S, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to S";
+ E_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
+ E_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
+ E_E, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
+ E_S, AccessPermission:Busy, desc="Shared, received WrVicBlk, sent Ack, waiting for Data";
+ O_M, AccessPermission:Busy, desc="...";
+ O_O, AccessPermission:Busy, desc="...";
+ O_E, AccessPermission:Busy, desc="...";
+ M_M, AccessPermission:Busy, desc="...";
+ M_O, AccessPermission:Busy, desc="...";
+ M_E, AccessPermission:Busy, desc="...";
+ M_S, AccessPermission:Busy, desc="...";
+ D_I, AccessPermission:Invalid, desc="drop WB data on the floor when receive";
+ MOD_I, AccessPermission:Busy, desc="drop WB data on the floor, waiting for WBAck from Mem";
+ MO_I, AccessPermission:Busy, desc="M or O, received L2_Repl, waiting for WBAck from Mem";
+ ES_I, AccessPermission:Busy, desc="E or S, received L2_Repl, waiting for WBAck from Mem";
+ I_C, AccessPermission:Invalid, desc="sent cancel, just waiting to receive mem wb ack so nothing gets confused";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+
+ // STRUCTURES
+
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff from memory?)";
+ DataBlock DataBlk, desc="Data for the block";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block";
+ bool Dirty, desc="Is the data dirty?";
+ bool Shared, desc="Victim hit by shared probe";
+ MachineID From, desc="Waiting for writeback from...";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs";
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+
+
+ // FUNCTION DEFINITIONS
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+ return static_cast(Entry, "pointer", L2cache.lookup(addr));
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ return getCacheEntry(addr).DataBlk;
+ }
+
+ bool presentOrAvail(Addr addr) {
+ return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if (is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return TCC_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return TCC_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(TCC_State_to_permission(state));
+ }
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+
+
+ // OUT PORTS
+ out_port(w_requestNetwork_out, CPURequestMsg, w_reqToTCCDir);
+ out_port(w_TCCResp_out, ResponseMsg, w_respToTCCDir);
+ out_port(responseNetwork_out, ResponseMsg, responseFromTCC);
+ out_port(w_unblockNetwork_out, UnblockMsg, w_TCCUnblockToTCCDir);
+
+ // IN PORTS
+ in_port(TDResponse_in, ResponseMsg, w_respToTCC) {
+ if (TDResponse_in.isReady(clockEdge())) {
+ peek(TDResponse_in, ResponseMsg) {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
+ trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+ }
+ else {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ error("Error on TDResponse Type");
+ }
+ }
+ }
+ }
+
+ // Response Network
+ in_port(responseNetwork_in, ResponseMsg, responseToTCC) {
+ if (responseNetwork_in.isReady(clockEdge())) {
+ peek(responseNetwork_in, ResponseMsg) {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:CPUData) {
+ if (in_msg.NbReqShared) {
+ trigger(Event:CPUDataShared, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:CPUData, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+ trigger(Event:StaleWB, in_msg.addr, cache_entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ error("Error on TDResponse Type");
+ }
+ }
+ }
+ }
+
+ // probe network
+ in_port(probeNetwork_in, TDProbeRequestMsg, w_probeToTCC) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, TDProbeRequestMsg) {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ if (in_msg.Type == ProbeRequestType:PrbInv) {
+ if (in_msg.ReturnData) {
+ trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+ if (in_msg.ReturnData) {
+ trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Don't think I should get any of these");
+ }
+ }
+ }
+ }
+ }
+
+ // Request Network
+ in_port(requestNetwork_in, CPURequestMsg, w_reqToTCC) {
+ if (requestNetwork_in.isReady(clockEdge())) {
+ peek(requestNetwork_in, CPURequestMsg) {
+ assert(in_msg.Destination.isElement(machineID));
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ if (in_msg.Type == CoherenceRequestType:RdBlk) {
+ trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+ trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+ trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+ if (presentOrAvail(in_msg.addr)) {
+ if (in_msg.Shared) {
+ trigger(Event:ClVicBlkShared, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:ClVicBlk, in_msg.addr, cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.addr);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+ if (presentOrAvail(in_msg.addr)) {
+ if (in_msg.Shared) {
+ trigger(Event:WrVicBlkShared, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.addr);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else {
+ requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+ }
+ }
+ }
+
+ // BEGIN ACTIONS
+
+ action(i_invL2, "i", desc="invalidate TCC cache block") {
+ if (is_valid(cache_entry)) {
+ L2cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(rm_sendResponseM, "rm", desc="send Modified response") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.State := CoherenceState:Modified;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(rs_sendResponseS, "rs", desc="send Shared response") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.State := CoherenceState:Shared;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+
+ action(r_requestToTD, "r", desc="Miss in L2, pass on") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Shared := false; // unneeded for this request
+ out_msg.MessageSize := in_msg.MessageSize;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ if (is_valid(cache_entry)) {
+ tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
+ tbe.Dirty := cache_entry.Dirty;
+ }
+ tbe.From := machineID;
+ }
+
+ action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(vc_vicClean, "vc", desc="Victimize Clean L2 data") {
+ enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:VicClean;
+ out_msg.Requestor := machineID;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(vd_vicDirty, "vd", desc="Victimize dirty L2 data") {
+ enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:VicDirty;
+ out_msg.Requestor := machineID;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysWBAck;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+ enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(ph_sendProbeResponseHit, "ph", desc="send probe ack, no data") {
+ enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false;
+ out_msg.Hit := true;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pm_sendProbeResponseMiss, "pm", desc="send probe ack, no data") {
+ enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+ enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.DataBlk := cache_entry.DataBlk;
+ //assert(cache_entry.Dirty); Not needed in TCC where TCC can supply clean data
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") {
+ enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.DataBlk := tbe.DataBlk;
+ //assert(tbe.Dirty);
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.Hit := true;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.State := CoherenceState:NA;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(mc_cancelMemWriteback, "mc", desc="send writeback cancel to memory") {
+ enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:WrCancel;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ }
+
+ action(a_allocateBlock, "a", desc="allocate TCC block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L2cache.allocate(address, new Entry));
+ }
+ }
+
+ action(d_writeData, "d", desc="write data to TCC") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (in_msg.Dirty) {
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ cache_entry.DataBlk := in_msg.DataBlk;
+ DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+ }
+ }
+
+ action(rd_copyDataFromRequest, "rd", desc="write data to TCC") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := true;
+ }
+ }
+
+ action(f_setFrom, "f", desc="set who WB is expected to come from") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ tbe.From := in_msg.Requestor;
+ }
+ }
+
+ action(rf_resetFrom, "rf", desc="reset From") {
+ tbe.From := machineID;
+ }
+
+ action(wb_data, "wb", desc="write back data") {
+ enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUData;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Shared) {
+ out_msg.NbReqShared := true;
+ } else {
+ out_msg.NbReqShared := false;
+ }
+ out_msg.State := CoherenceState:Shared; // faux info
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(wt_writeDataToTBE, "wt", desc="write WB data to TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(uo_sendUnblockOwner, "uo", desc="state changed to E, M, or O, unblock") {
+ enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ out_msg.currentOwner := true;
+ out_msg.valid := true;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(us_sendUnblockSharer, "us", desc="state changed to S , unblock") {
+ enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ out_msg.currentOwner := false;
+ out_msg.valid := true;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(un_sendUnblockNotValid, "un", desc="state changed toI, unblock") {
+ enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ out_msg.currentOwner := false;
+ out_msg.valid := false;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+ L2cache.setMRU(address);
+ }
+
+ action(p_popRequestQueue, "p", desc="pop request queue") {
+ requestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="pop response queue") {
+ responseNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pn_popTDResponseQueue, "pn", desc="pop TD response queue") {
+ TDResponse_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+
+ action(zz_recycleRequestQueue, "\z", desc="recycle request queue") {
+ requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+
+ // END ACTIONS
+
+ // BEGIN TRANSITIONS
+
+ // transitions from base
+
+ transition({I, I_C}, {RdBlk, RdBlkS, RdBlkM, CtoD}){TagArrayRead} {
+ // TCCdir already knows that the block is not here. This is to allocate and get the block.
+ r_requestToTD;
+ p_popRequestQueue;
+ }
+
+// check
+ transition({M, O}, RdBlk, O){TagArrayRead, TagArrayWrite} {
+ rs_sendResponseS;
+ ut_updateTag;
+ // detect 2nd chancing
+ p_popRequestQueue;
+ }
+
+//check
+ transition({E, S}, RdBlk, S){TagArrayRead, TagArrayWrite} {
+ rs_sendResponseS;
+ ut_updateTag;
+ // detect 2nd chancing
+ p_popRequestQueue;
+ }
+
+// check
+ transition({M, O}, RdBlkS, O){TagArrayRead, TagArrayWrite} {
+ rs_sendResponseS;
+ ut_updateTag;
+ // detect 2nd chance sharing
+ p_popRequestQueue;
+ }
+
+//check
+ transition({E, S}, RdBlkS, S){TagArrayRead, TagArrayWrite} {
+ rs_sendResponseS;
+ ut_updateTag;
+ // detect 2nd chance sharing
+ p_popRequestQueue;
+ }
+
+// check
+ transition(M, RdBlkM, I){TagArrayRead, TagArrayWrite} {
+ rm_sendResponseM;
+ i_invL2;
+ p_popRequestQueue;
+ }
+
+ //check
+ transition(E, RdBlkM, I){TagArrayRead, TagArrayWrite} {
+ rm_sendResponseM;
+ i_invL2;
+ p_popRequestQueue;
+ }
+
+// check
+ transition({I}, WrVicBlk, I_M){TagArrayRead} {
+ a_allocateBlock;
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(I_C, {WrVicBlk, WrVicBlkShared, ClVicBlk, ClVicBlkShared}) {
+ zz_recycleRequestQueue;
+ }
+
+//check
+ transition({I}, WrVicBlkShared, I_O) {TagArrayRead}{
+ a_allocateBlock;
+ t_allocateTBE;
+ f_setFrom;
+// rd_copyDataFromRequest;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+//check
+ transition(S, WrVicBlkShared, S_O){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(S, WrVicBlk, S_S){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(E, WrVicBlk, E_E){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(E, WrVicBlkShared, E_E){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(O, WrVicBlk, O_O){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(O, WrVicBlkShared, O_O){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(M, WrVicBlk, M_M){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(M, WrVicBlkShared, M_O){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+//check
+ transition({I}, ClVicBlk, I_E){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ a_allocateBlock;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition({I}, ClVicBlkShared, I_S){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ a_allocateBlock;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+//check
+ transition(S, ClVicBlkShared, S_S){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(E, ClVicBlk, E_E){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(E, ClVicBlkShared, E_S){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(O, ClVicBlk, O_O){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// check. Original L3 ahd it going from O to O_S. Something can go from O to S only on writeback.
+ transition(O, ClVicBlkShared, O_O){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(M, ClVicBlk, M_E){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(M, ClVicBlkShared, M_S){TagArrayRead} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+
+ transition({MO_I}, {RdBlk, RdBlkS, RdBlkM, CtoD}) {
+ a_allocateBlock;
+ t_allocateTBE;
+ f_setFrom;
+ r_requestToTD;
+ p_popRequestQueue;
+ }
+
+ transition(MO_I, {WrVicBlkShared, WrVicBlk, ClVicBlk, ClVicBlkShared}, MOD_I) {
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(I_M, CPUData, M){TagArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E, CPUData, E){TagArrayWrite, DataArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite} {
+ us_sendUnblockSharer;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite} {
+ us_sendUnblockSharer;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(S_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(S_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(S_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite} {
+ us_sendUnblockSharer;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(S_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite} {
+ us_sendUnblockSharer;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(O_E, CPUDataShared, O){TagArrayWrite, DataArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(O_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition({D_I}, {CPUData, CPUDataShared}, I){TagArrayWrite} {
+ un_sendUnblockNotValid;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(MOD_I, {CPUData, CPUDataShared}, MO_I) {
+ un_sendUnblockNotValid;
+ rf_resetFrom;
+ pr_popResponseQueue;
+ }
+
+ transition({O,S,I}, CPUData) {
+ pr_popResponseQueue;
+ }
+
+ transition({M, O}, L2_Repl, MO_I){TagArrayRead, DataArrayRead} {
+ t_allocateTBE;
+ vd_vicDirty;
+ i_invL2;
+ }
+
+ transition({E, S,}, L2_Repl, ES_I){TagArrayRead, DataArrayRead} {
+ t_allocateTBE;
+ vc_vicClean;
+ i_invL2;
+ }
+
+ transition({I_M, I_O, S_M, S_O, E_M, E_O}, L2_Repl) {
+ zz_recycleRequestQueue;
+ }
+
+ transition({O_M, O_O, O_E, M_M, M_O, M_E, M_S}, L2_Repl) {
+ zz_recycleRequestQueue;
+ }
+
+ transition({I_E, I_S, S_E, S_S, E_E, E_S}, L2_Repl) {
+ zz_recycleRequestQueue;
+ }
+
+ transition({M, O}, PrbInvData, I){TagArrayRead, TagArrayWrite} {
+ pd_sendProbeResponseData;
+ i_invL2;
+ pp_popProbeQueue;
+ }
+
+ transition(I, PrbInvData){TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition({E, S}, PrbInvData, I){TagArrayRead, TagArrayWrite} {
+ pd_sendProbeResponseData;
+ i_invL2;
+ pp_popProbeQueue;
+ }
+
+ transition({M, O, E, S, I}, PrbInv, I){TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ i_invL2; // nothing will happen in I
+ pp_popProbeQueue;
+ }
+
+ transition({M, O}, PrbShrData, O){TagArrayRead, TagArrayWrite} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({E, S}, PrbShrData, S){TagArrayRead, TagArrayWrite} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition(I, PrbShrData){TagArrayRead} {
+ pm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbInvData, I_C) {
+ pdt_sendProbeResponseDataFromTBE;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, PrbInvData, I_C) {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition({ES_I,MO_I}, PrbInv, I_C) {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition({ES_I, MO_I}, PrbShrData) {
+ pdt_sendProbeResponseDataFromTBE;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, {PrbInvData, PrbInv}) {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, PrbShrData) {
+ pm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ transition(MOD_I, WBAck, D_I) {
+ pn_popTDResponseQueue;
+ }
+
+ transition(MO_I, WBAck, I){TagArrayWrite} {
+ dt_deallocateTBE;
+ pn_popTDResponseQueue;
+ }
+
+ // this can only be a spurious CPUData from a shared block.
+ transition(MO_I, CPUData) {
+ pr_popResponseQueue;
+ }
+
+ transition(ES_I, WBAck, I){TagArrayWrite} {
+ dt_deallocateTBE;
+ pn_popTDResponseQueue;
+ }
+
+ transition(I_C, {WBAck}, I){TagArrayWrite} {
+ dt_deallocateTBE;
+ pn_popTDResponseQueue;
+ }
+
+ transition({I_M, I_O, I_E, I_S}, StaleWB, I){TagArrayWrite} {
+ un_sendUnblockNotValid;
+ dt_deallocateTBE;
+ i_invL2;
+ pr_popResponseQueue;
+ }
+
+ transition({S_S, S_O, S_M, S_E}, StaleWB, S){TagArrayWrite} {
+ us_sendUnblockSharer;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition({E_M, E_O, E_E, E_S}, StaleWB, E){TagArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition({O_M, O_O, O_E}, StaleWB, O){TagArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition({M_M, M_O, M_E, M_S}, StaleWB, M){TagArrayWrite} {
+ uo_sendUnblockOwner;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(D_I, StaleWB, I) {TagArrayWrite}{
+ un_sendUnblockNotValid;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(MOD_I, StaleWB, MO_I) {
+ un_sendUnblockNotValid;
+ rf_resetFrom;
+ pr_popResponseQueue;
+ }
+
+}
diff --git a/src/mem/protocol/GPU_RfO-TCCdir.sm b/src/mem/protocol/GPU_RfO-TCCdir.sm
new file mode 100644
index 000000000..8f58d6ebb
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO-TCCdir.sm
@@ -0,0 +1,2672 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Mithuna Thottethodi
+ */
+
+machine(MachineType:TCCdir, "AMD read-for-ownership directory for TCC (aka GPU L2)")
+: CacheMemory * directory;
+ // Convention: wire buffers are prefixed with "w_" for clarity
+ WireBuffer * w_reqToTCCDir;
+ WireBuffer * w_respToTCCDir;
+ WireBuffer * w_TCCUnblockToTCCDir;
+ WireBuffer * w_reqToTCC;
+ WireBuffer * w_probeToTCC;
+ WireBuffer * w_respToTCC;
+ int TCC_select_num_bits;
+ Cycles response_latency := 5;
+ Cycles directory_latency := 6;
+ Cycles issue_latency := 120;
+
+ // From the TCPs or SQCs
+ MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request";
+ MessageBuffer * responseFromTCP, network="From", virtual_network="3", vnet_type="response";
+ MessageBuffer * unblockFromTCP, network="From", virtual_network="5", vnet_type="unblock";
+
+ // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC.
+ MessageBuffer * probeToCore, network="To", virtual_network="1", vnet_type="request";
+ MessageBuffer * responseToCore, network="To", virtual_network="3", vnet_type="response";
+
+ // From the NB
+ MessageBuffer * probeFromNB, network="From", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseFromNB, network="From", virtual_network="2", vnet_type="response";
+ // To the NB
+ MessageBuffer * requestToNB, network="To", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseToNB, network="To", virtual_network="2", vnet_type="response";
+ MessageBuffer * unblockToNB, network="To", virtual_network="4", vnet_type="unblock";
+
+ MessageBuffer * triggerQueue, random="false";
+{
+ // STATES
+ state_declaration(State, desc="Directory states", default="TCCdir_State_I") {
+ // Base states
+ I, AccessPermission:Invalid, desc="Invalid";
+ S, AccessPermission:Invalid, desc="Shared";
+ E, AccessPermission:Invalid, desc="Shared";
+ O, AccessPermission:Invalid, desc="Owner";
+ M, AccessPermission:Invalid, desc="Modified";
+
+ CP_I, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to invalid";
+ B_I, AccessPermission:Invalid, desc="Blocked, need not send data after acks are in, going to invalid";
+ CP_O, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to owned";
+ CP_S, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to shared";
+ CP_OM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to O_M";
+ CP_SM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to S_M";
+ CP_ISM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M";
+ CP_IOM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M";
+ CP_OSIW, AccessPermission:Invalid, desc="Blocked, must send data after acks+CancelWB are in, going to I_C";
+
+
+ // Transient states and busy states used for handling side (TCC-facing) interactions
+ BW_S, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
+ BW_E, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
+ BW_O, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
+ BW_M, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
+
+ // Transient states and busy states used for handling upward (TCP-facing) interactions
+ I_M, AccessPermission:Invalid, desc="Invalid, issued RdBlkM, have not seen response yet";
+ I_ES, AccessPermission:Invalid, desc="Invalid, issued RdBlk, have not seen response yet";
+ I_S, AccessPermission:Invalid, desc="Invalid, issued RdBlkS, have not seen response yet";
+ BBS_S, AccessPermission:Invalid, desc="Blocked, going from S to S";
+ BBO_O, AccessPermission:Invalid, desc="Blocked, going from O to O";
+ BBM_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for data to forward";
+ BBM_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for data to forward";
+ BB_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for unblock";
+ BB_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for unblock";
+ BB_OO, AccessPermission:Invalid, desc="Blocked, going from O to O (adding sharers), waiting for unblock";
+ BB_S, AccessPermission:Invalid, desc="Blocked, going to S, waiting for (possible multiple) unblock(s)";
+ BBS_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M";
+ BBO_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M";
+ BBS_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade";
+ BBO_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade";
+ S_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet";
+ O_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet";
+
+ //
+ BBB_S, AccessPermission:Invalid, desc="Blocked, going to S after core unblock";
+ BBB_M, AccessPermission:Invalid, desc="Blocked, going to M after core unblock";
+ BBB_E, AccessPermission:Invalid, desc="Blocked, going to E after core unblock";
+
+ VES_I, AccessPermission:Invalid, desc="TCC replacement, waiting for clean WB ack";
+ VM_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack";
+ VO_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack";
+ VO_S, AccessPermission:Invalid, desc="TCC owner replacement, waiting for dirty WB ack";
+
+ ES_I, AccessPermission:Invalid, desc="L1 replacement, waiting for clean WB ack";
+ MO_I, AccessPermission:Invalid, desc="L1 replacement, waiting for dirty WB ack";
+
+ I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB for canceled WB";
+ I_W, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB; canceled WB raced with directory invalidation";
+
+ // Recall States
+ BRWD_I, AccessPermission:Invalid, desc="Recalling, waiting for WBAck and Probe Data responses";
+ BRW_I, AccessPermission:Read_Write, desc="Recalling, waiting for WBAck";
+ BRD_I, AccessPermission:Invalid, desc="Recalling, waiting for Probe Data responses";
+
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+
+
+ // EVENTS
+ enumeration(Event, desc="TCC Directory Events") {
+ // Upward facing events (TCCdir w.r.t. TCP/SQC and TCC behaves like NBdir behaves with TCP/SQC and L3
+
+ // Directory Recall
+ Recall, desc="directory cache is full";
+ // CPU requests
+ CPUWrite, desc="Initial req from core, sent to TCC";
+ NoCPUWrite, desc="Initial req from core, but non-exclusive clean data; can be discarded";
+ CPUWriteCancel, desc="Initial req from core, sent to TCC";
+
+ // Requests from the TCPs
+ RdBlk, desc="RdBlk event";
+ RdBlkM, desc="RdBlkM event";
+ RdBlkS, desc="RdBlkS event";
+ CtoD, desc="Change to Dirty request";
+
+ // TCC writebacks
+ VicDirty, desc="...";
+ VicDirtyLast, desc="...";
+ VicClean, desc="...";
+ NoVic, desc="...";
+ StaleVic, desc="...";
+ CancelWB, desc="TCC got invalidating probe, canceled WB";
+
+ // Probe Responses from TCP/SQCs
+ CPUPrbResp, desc="Probe response from TCP/SQC";
+ TCCPrbResp, desc="Probe response from TCC";
+
+ ProbeAcksComplete, desc="All acks received";
+ ProbeAcksCompleteReissue, desc="All acks received, changing CtoD to reissue";
+
+ CoreUnblock, desc="unblock from TCP/SQC";
+ LastCoreUnblock, desc="Last unblock from TCP/SQC";
+ TCCUnblock, desc="unblock from TCC (current owner)";
+ TCCUnblock_Sharer, desc="unblock from TCC (a sharer, not owner)";
+ TCCUnblock_NotValid,desc="unblock from TCC (not valid...caused by stale writebacks)";
+
+ // Downward facing events
+
+ // NB initiated
+ NB_AckS, desc="NB Ack to TCC Request";
+ NB_AckE, desc="NB Ack to TCC Request";
+ NB_AckM, desc="NB Ack to TCC Request";
+ NB_AckCtoD, desc="NB Ack to TCC Request";
+ NB_AckWB, desc="NB Ack for clean WB";
+
+
+ // Incoming Probes from NB
+ PrbInvData, desc="Invalidating probe, return dirty data";
+ PrbInv, desc="Invalidating probe, no need to return data";
+ PrbShrData, desc="Downgrading probe, return data";
+ }
+
+
+ // TYPES
+
+ // Entry for directory
+ structure(Entry, desc="...", interface='AbstractCacheEntry') {
+ State CacheState, desc="Cache state (Cache of directory entries)";
+ DataBlock DataBlk, desc="data for the block";
+ NetDest Sharers, desc="Sharers for this block";
+ NetDest Owner, desc="Owner of this block";
+ NetDest MergedSharers, desc="Read sharers who are merged on a request";
+ int WaitingUnblocks, desc="Number of acks we're waiting for";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="DataBlk";
+ bool Dirty, desc="Is the data dirty?";
+ MachineID Requestor, desc="requestor";
+ int NumPendingAcks, desc="num acks expected";
+ MachineID OriginalRequestor, desc="Original Requestor";
+ MachineID UntransferredOwner, desc = "Untransferred owner for an upgrade transaction";
+ bool UntransferredOwnerExists, desc = "1 if Untransferred owner exists for an upgrade transaction";
+ bool Cached, desc="data hit in Cache";
+ bool Shared, desc="victim hit by shared probe";
+ bool Upgrade, desc="An upgrade request in progress";
+ bool CtoD, desc="Saved sysack info";
+ CoherenceState CohState, desc="Saved sysack info";
+ MessageSizeType MessageSize, desc="Saved sysack info";
+ MachineID Sender, desc="sender";
+ }
+
+ structure(TBETable, external = "yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ // ** OBJECTS **
+ TBETable TBEs, template="<TCCdir_TBE>", constructor="m_number_of_TBEs";
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+ NetDest TCC_dir_subtree;
+ NetDest temp;
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+
+
+ bool presentOrAvail(Addr addr) {
+ return directory.isTagPresent(addr) || directory.cacheAvail(addr);
+ }
+
+ Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+ return static_cast(Entry, "pointer", directory.lookup(addr));
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return tbe.DataBlk;
+ } else {
+ assert(false);
+ return getCacheEntry(addr).DataBlk;
+ }
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if(is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(TCCdir_State_to_permission(state));
+ }
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return TCCdir_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return TCCdir_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+
+ if (state == State:S) {
+ assert(cache_entry.Owner.count() == 0);
+ }
+
+ if (state == State:O) {
+ assert(cache_entry.Owner.count() == 1);
+ assert(cache_entry.Sharers.isSuperset(cache_entry.Owner) == false);
+ }
+
+ if (state == State:M) {
+ assert(cache_entry.Owner.count() == 1);
+ assert(cache_entry.Sharers.count() == 0);
+ }
+
+ if (state == State:E) {
+ assert(cache_entry.Owner.count() == 0);
+ assert(cache_entry.Sharers.count() == 1);
+ }
+ }
+ }
+
+
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ directory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ directory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ directory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ directory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ return directory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ return directory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ return directory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return directory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ // ** OUT_PORTS **
+
+ // Three classes of ports
+ // Class 1: downward facing network links to NB
+ out_port(requestToNB_out, CPURequestMsg, requestToNB);
+ out_port(responseToNB_out, ResponseMsg, responseToNB);
+ out_port(unblockToNB_out, UnblockMsg, unblockToNB);
+
+
+ // Class 2: upward facing ports to GPU cores
+ out_port(probeToCore_out, TDProbeRequestMsg, probeToCore);
+ out_port(responseToCore_out, ResponseMsg, responseToCore);
+
+ // Class 3: sideward facing ports (on "wirebuffer" links) to TCC
+ out_port(w_requestTCC_out, CPURequestMsg, w_reqToTCC);
+ out_port(w_probeTCC_out, NBProbeRequestMsg, w_probeToTCC);
+ out_port(w_respTCC_out, ResponseMsg, w_respToTCC);
+
+
+ // local trigger port
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+
+ //
+ // request queue going to NB
+ //
+
+ // ** IN_PORTS **
+
+ // Trigger Queue
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=8) {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ assert(is_valid(tbe));
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == false)) {
+ trigger(Event:ProbeAcksComplete, in_msg.addr, cache_entry, tbe);
+ } else if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == true)) {
+ trigger(Event:ProbeAcksCompleteReissue, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+ // Unblock Networks (TCCdir can receive unblocks from TCC, TCPs)
+ // Port on first (of three) wire buffers from TCC
+ in_port(w_TCCUnblock_in, UnblockMsg, w_TCCUnblockToTCCDir, rank=7) {
+ if (w_TCCUnblock_in.isReady(clockEdge())) {
+ peek(w_TCCUnblock_in, UnblockMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.currentOwner) {
+ trigger(Event:TCCUnblock, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.valid) {
+ trigger(Event:TCCUnblock_Sharer, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:TCCUnblock_NotValid, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+ in_port(unblockNetwork_in, UnblockMsg, unblockFromTCP, rank=6) {
+ if (unblockNetwork_in.isReady(clockEdge())) {
+ peek(unblockNetwork_in, UnblockMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if(cache_entry.WaitingUnblocks == 1) {
+ trigger(Event:LastCoreUnblock, in_msg.addr, cache_entry, tbe);
+ }
+ else {
+ trigger(Event:CoreUnblock, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+
+ //Responses from TCC, and Cores
+ // Port on second (of three) wire buffers from TCC
+ in_port(w_TCCResponse_in, ResponseMsg, w_respToTCCDir, rank=5) {
+ if (w_TCCResponse_in.isReady(clockEdge())) {
+ peek(w_TCCResponse_in, ResponseMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+ trigger(Event:TCCPrbResp, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+ in_port(responseNetwork_in, ResponseMsg, responseFromTCP, rank=4) {
+ if (responseNetwork_in.isReady(clockEdge())) {
+ peek(responseNetwork_in, ResponseMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+ trigger(Event:CPUPrbResp, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+
+ // Port on third (of three) wire buffers from TCC
+ in_port(w_TCCRequest_in, CPURequestMsg, w_reqToTCCDir, rank=3) {
+ if(w_TCCRequest_in.isReady(clockEdge())) {
+ peek(w_TCCRequest_in, CPURequestMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.Type == CoherenceRequestType:WrCancel) {
+ trigger(Event:CancelWB, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+ if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) {
+ // if modified, or owner with no other sharers
+ if ((cache_entry.CacheState == State:M) || (cache_entry.Sharers.count() == 0)) {
+ assert(cache_entry.Owner.count()==1);
+ trigger(Event:VicDirtyLast, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:VicDirty, in_msg.addr, cache_entry, tbe);
+ }
+ } else {
+ trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe);
+ }
+ } else {
+ if (in_msg.Type == CoherenceRequestType:VicClean) {
+ if (is_valid(cache_entry) && cache_entry.Sharers.isElement(in_msg.Requestor)) {
+ if (cache_entry.Sharers.count() == 1) {
+ // Last copy, victimize to L3
+ trigger(Event:VicClean, in_msg.addr, cache_entry, tbe);
+ } else {
+ // Either not the last copy or stall. No need to victimmize
+ // remove sharer from sharer list
+ assert(cache_entry.Sharers.count() > 1);
+ trigger(Event:NoVic, in_msg.addr, cache_entry, tbe);
+ }
+ } else {
+ trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ in_port(responseFromNB_in, ResponseMsg, responseFromNB, rank=2) {
+ if (responseFromNB_in.isReady(clockEdge())) {
+ peek(responseFromNB_in, ResponseMsg, block_on="addr") {
+
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+ if (in_msg.State == CoherenceState:Modified) {
+ if (in_msg.CtoD) {
+ trigger(Event:NB_AckCtoD, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.State == CoherenceState:Shared) {
+ trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.State == CoherenceState:Exclusive) {
+ trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+ trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+
+ // Finally handling incoming requests (from TCP) and probes (from NB).
+
+ in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB, rank=1) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, NBProbeRequestMsg) {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ DPRINTF(RubySlicc, "machineID: %s\n", machineID);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == ProbeRequestType:PrbInv) {
+ if (in_msg.ReturnData) {
+ trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+ assert(in_msg.ReturnData);
+ trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+
+ in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) {
+ if (coreRequestNetwork_in.isReady(clockEdge())) {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (presentOrAvail(in_msg.addr)) {
+ if (in_msg.Type == CoherenceRequestType:VicDirty) {
+ trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+ if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) {
+ trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+ } else if(is_valid(cache_entry) && (cache_entry.Sharers.count() + cache_entry.Owner.count() ) >1) {
+ trigger(Event:NoCPUWrite, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
+ trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+ trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+ trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:WrCancel) {
+ trigger(Event:CPUWriteCancel, in_msg.addr, cache_entry, tbe);
+ }
+ } else {
+ // All requests require a directory entry
+ Addr victim := directory.cacheProbe(in_msg.addr);
+ trigger(Event:Recall, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ }
+ }
+ }
+
+
+
+
+ // Actions
+
+ //Downward facing actions
+
+ action(c_clearOwner, "c", desc="Clear the owner field") {
+ cache_entry.Owner.clear();
+ }
+
+ action(rS_removeRequesterFromSharers, "rS", desc="Remove unblocker from sharer list") {
+ peek(unblockNetwork_in, UnblockMsg) {
+ cache_entry.Sharers.remove(in_msg.Sender);
+ }
+ }
+
+ action(rT_removeTCCFromSharers, "rT", desc="Remove TCC from sharer list") {
+ peek(w_TCCRequest_in, CPURequestMsg) {
+ cache_entry.Sharers.remove(in_msg.Requestor);
+ }
+ }
+
+ action(rO_removeOriginalRequestorFromSharers, "rO", desc="Remove replacing core from sharer list") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ cache_entry.Sharers.remove(in_msg.Requestor);
+ }
+ }
+
+ action(rC_removeCoreFromSharers, "rC", desc="Remove replacing core from sharer list") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ cache_entry.Sharers.remove(in_msg.Requestor);
+ }
+ }
+
+ action(rCo_removeCoreFromOwner, "rCo", desc="Remove replacing core from sharer list") {
+ // Note that under some cases this action will try to remove a stale owner
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ cache_entry.Owner.remove(in_msg.Requestor);
+ }
+ }
+
+ action(rR_removeResponderFromSharers, "rR", desc="Remove responder from sharer list") {
+ peek(responseNetwork_in, ResponseMsg) {
+ cache_entry.Sharers.remove(in_msg.Sender);
+ }
+ }
+
+ action(nC_sendNullWBAckToCore, "nC", desc = "send a null WB Ack to release core") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(responseToCore_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysWBNack;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := in_msg.MessageSize;
+ }
+ }
+ }
+
+ action(nT_sendNullWBAckToTCC, "nT", desc = "send a null WB Ack to release TCC") {
+ peek(w_TCCRequest_in, CPURequestMsg) {
+ enqueue(w_respTCC_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysWBAck;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := in_msg.MessageSize;
+ }
+ }
+ }
+
+ action(eto_moveExSharerToOwner, "eto", desc="move the current exclusive sharer to owner") {
+ assert(cache_entry.Sharers.count() == 1);
+ assert(cache_entry.Owner.count() == 0);
+ cache_entry.Owner := cache_entry.Sharers;
+ cache_entry.Sharers.clear();
+ APPEND_TRANSITION_COMMENT(" new owner ");
+ APPEND_TRANSITION_COMMENT(cache_entry.Owner);
+ }
+
+ action(aT_addTCCToSharers, "aT", desc="Add TCC to sharer list") {
+ peek(w_TCCUnblock_in, UnblockMsg) {
+ cache_entry.Sharers.add(in_msg.Sender);
+ }
+ }
+
+ action(as_addToSharers, "as", desc="Add unblocker to sharer list") {
+ peek(unblockNetwork_in, UnblockMsg) {
+ cache_entry.Sharers.add(in_msg.Sender);
+ }
+ }
+
+ action(c_moveOwnerToSharer, "cc", desc="Move owner to sharers") {
+ cache_entry.Sharers.addNetDest(cache_entry.Owner);
+ cache_entry.Owner.clear();
+ }
+
+ action(cc_clearSharers, "\c", desc="Clear the sharers field") {
+ cache_entry.Sharers.clear();
+ }
+
+ action(e_ownerIsUnblocker, "e", desc="The owner is now the unblocker") {
+ peek(unblockNetwork_in, UnblockMsg) {
+ cache_entry.Owner.clear();
+ cache_entry.Owner.add(in_msg.Sender);
+ APPEND_TRANSITION_COMMENT(" tcp_ub owner ");
+ APPEND_TRANSITION_COMMENT(cache_entry.Owner);
+ }
+ }
+
+ action(eT_ownerIsUnblocker, "eT", desc="TCC (unblocker) is now owner") {
+ peek(w_TCCUnblock_in, UnblockMsg) {
+ cache_entry.Owner.clear();
+ cache_entry.Owner.add(in_msg.Sender);
+ APPEND_TRANSITION_COMMENT(" tcc_ub owner ");
+ APPEND_TRANSITION_COMMENT(cache_entry.Owner);
+ }
+ }
+
+ action(ctr_copyTCCResponseToTBE, "ctr", desc="Copy TCC probe response data to TBE") {
+ peek(w_TCCResponse_in, ResponseMsg) {
+ // Overwrite data if tbe does not hold dirty data. Stop once it is dirty.
+ if(tbe.Dirty == false) {
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := in_msg.Dirty;
+ tbe.Sender := in_msg.Sender;
+ }
+ DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk));
+ }
+ }
+
+ action(ccr_copyCoreResponseToTBE, "ccr", desc="Copy core probe response data to TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ // Overwrite data if tbe does not hold dirty data. Stop once it is dirty.
+ if(tbe.Dirty == false) {
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := in_msg.Dirty;
+
+ if(tbe.Sender == machineID) {
+ tbe.Sender := in_msg.Sender;
+ }
+ }
+ DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk));
+ }
+ }
+
+ action(cd_clearDirtyBitTBE, "cd", desc="Clear Dirty bit in TBE") {
+ tbe.Dirty := false;
+ }
+
+ action(n_issueRdBlk, "n-", desc="Issue RdBlk") {
+ enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlk;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ }
+
+ action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+ enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkS;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ }
+
+ action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
+ enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkM;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ }
+
+ action(rU_rememberUpgrade, "rU", desc="Remember that this was an upgrade") {
+ tbe.Upgrade := true;
+ }
+
+ action(ruo_rememberUntransferredOwner, "ruo", desc="Remember the untransferred owner") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if(in_msg.UntransferredOwner == true) {
+ tbe.UntransferredOwner := in_msg.Sender;
+ tbe.UntransferredOwnerExists := true;
+ }
+ DPRINTF(RubySlicc, "%s\n", (in_msg));
+ }
+ }
+
+ action(ruoT_rememberUntransferredOwnerTCC, "ruoT", desc="Remember the untransferred owner") {
+ peek(w_TCCResponse_in, ResponseMsg) {
+ if(in_msg.UntransferredOwner == true) {
+ tbe.UntransferredOwner := in_msg.Sender;
+ tbe.UntransferredOwnerExists := true;
+ }
+ DPRINTF(RubySlicc, "%s\n", (in_msg));
+ }
+ }
+
+ action(vd_victim, "vd", desc="Victimize M/O Data") {
+ enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicDirty;
+ if (cache_entry.CacheState == State:O) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ out_msg.Dirty := true;
+ }
+ }
+
+ action(vc_victim, "vc", desc="Victimize E/S Data") {
+ enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicClean;
+ if (cache_entry.CacheState == State:S) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ out_msg.Dirty := false;
+ }
+ }
+
+
+ action(sT_sendRequestToTCC, "sT", desc="send request to TCC") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(w_requestTCC_out, CPURequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Shared := in_msg.Shared;
+ out_msg.MessageSize := in_msg.MessageSize;
+ }
+ APPEND_TRANSITION_COMMENT(" requestor ");
+ APPEND_TRANSITION_COMMENT(in_msg.Requestor);
+
+ }
+ }
+
+
+ action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
+ MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+
+ temp := cache_entry.Sharers;
+ temp.addNetDest(cache_entry.Owner);
+ if (temp.isElement(tcc)) {
+ temp.remove(tcc);
+ }
+ if (temp.count() > 0) {
+ enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination := temp;
+ tbe.NumPendingAcks := temp.count();
+ if(cache_entry.CacheState == State:M) {
+ assert(tbe.NumPendingAcks == 1);
+ }
+ DPRINTF(RubySlicc, "%s\n", (out_msg));
+ }
+ }
+ }
+
+ action(ls2_probeShrL2Data, "ls2", desc="local probe downgrade L2, return data") {
+ MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+ enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.add(tcc);
+ tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+
+ }
+ }
+ }
+
+ action(s2_probeShrL2Data, "s2", desc="probe shared L2, return data") {
+ MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+ enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.add(tcc);
+ tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+
+ }
+ }
+ }
+
+ action(ldc_probeInvCoreData, "ldc", desc="local probe to inv cores, return data") {
+ MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ NetDest dest:= cache_entry.Sharers;
+ dest.addNetDest(cache_entry.Owner);
+ if(dest.isElement(tcc)){
+ dest.remove(tcc);
+ }
+ dest.remove(in_msg.Requestor);
+ tbe.NumPendingAcks := dest.count();
+ if (dest.count()>0){
+ enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+
+ out_msg.Destination.addNetDest(dest);
+ if(cache_entry.CacheState == State:M) {
+ assert(tbe.NumPendingAcks == 1);
+ }
+
+ DPRINTF(RubySlicc, "%s\n", (out_msg));
+ }
+ }
+ }
+ }
+
+ action(ld2_probeInvL2Data, "ld2", desc="local probe inv L2, return data") {
+ MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+ enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.add(tcc);
+ tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+
+ }
+ }
+ }
+
+ action(dc_probeInvCoreData, "dc", desc="probe inv cores + TCC, return data") {
+ MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+
+ out_msg.Destination.addNetDest(cache_entry.Sharers);
+ out_msg.Destination.addNetDest(cache_entry.Owner);
+ tbe.NumPendingAcks := cache_entry.Sharers.count() + cache_entry.Owner.count();
+ if(cache_entry.CacheState == State:M) {
+ assert(tbe.NumPendingAcks == 1);
+ }
+ if (out_msg.Destination.isElement(tcc)) {
+ out_msg.Destination.remove(tcc);
+ tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+ }
+
+ DPRINTF(RubySlicc, "%s\n", (out_msg));
+ }
+ }
+
+ action(d2_probeInvL2Data, "d2", desc="probe inv L2, return data") {
+ MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+ enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.add(tcc);
+ tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+
+ }
+ }
+ }
+
+ action(lpc_probeInvCore, "lpc", desc="local probe inv cores, no data") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ TCC_dir_subtree.broadcast(MachineType:TCP);
+ TCC_dir_subtree.broadcast(MachineType:SQC);
+
+ temp := cache_entry.Sharers;
+ temp := temp.OR(cache_entry.Owner);
+ TCC_dir_subtree := TCC_dir_subtree.AND(temp);
+ tbe.NumPendingAcks := TCC_dir_subtree.count();
+ if(cache_entry.CacheState == State:M) {
+ assert(tbe.NumPendingAcks == 1);
+ }
+ if(TCC_dir_subtree.isElement(in_msg.Requestor)) {
+ TCC_dir_subtree.remove(in_msg.Requestor);
+ tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+ }
+
+ if(TCC_dir_subtree.count() > 0) {
+ enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := false;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.localCtoD := true;
+
+ out_msg.Destination.addNetDest(TCC_dir_subtree);
+
+ DPRINTF(RubySlicc, "%s\n", (out_msg));
+ }
+ }
+ }
+ }
+
+ action(ipc_probeInvCore, "ipc", desc="probe inv cores, no data") {
+ TCC_dir_subtree.broadcast(MachineType:TCP);
+ TCC_dir_subtree.broadcast(MachineType:SQC);
+
+ temp := cache_entry.Sharers;
+ temp := temp.OR(cache_entry.Owner);
+ TCC_dir_subtree := TCC_dir_subtree.AND(temp);
+ tbe.NumPendingAcks := TCC_dir_subtree.count();
+ if(TCC_dir_subtree.count() > 0) {
+
+ enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := false;
+ out_msg.MessageSize := MessageSizeType:Control;
+
+ out_msg.Destination.addNetDest(TCC_dir_subtree);
+ if(cache_entry.CacheState == State:M) {
+ assert(tbe.NumPendingAcks == 1);
+ }
+
+ DPRINTF(RubySlicc, "%s\n", (out_msg));
+ }
+ }
+ }
+
+ action(i2_probeInvL2, "i2", desc="probe inv L2, no data") {
+ MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+ enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+ tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := false;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.add(tcc);
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+
+ }
+ }
+ }
+
+ action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+ enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+ enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Dirty := false;
+ out_msg.Ntsl := true;
+ out_msg.Hit := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
+ enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Dirty := false; // only true if sending back data i think
+ out_msg.Hit := false;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+
+
+ action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+ enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry) || is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.DataBlk := getDataBlock(address);
+ if (is_valid(tbe)) {
+ out_msg.Dirty := tbe.Dirty;
+ }
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+
+ action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+ enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry) || is_valid(tbe));
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.DataBlk := getDataBlock(address);
+ if (is_valid(tbe)) {
+ out_msg.Dirty := tbe.Dirty;
+ }
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(mc_cancelWB, "mc", desc="send writeback cancel to NB directory") {
+ enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:WrCancel;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Requestor := machineID;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ }
+
+ action(sCS_sendCollectiveResponseS, "sCS", desc="send shared response to all merged TCP/SQC") {
+ enqueue(responseToCore_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := tbe.Sender;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.CtoD := false;
+ out_msg.State := CoherenceState:Shared;
+ out_msg.Destination.addNetDest(cache_entry.MergedSharers);
+ out_msg.Shared := tbe.Shared;
+ out_msg.Dirty := tbe.Dirty;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(sS_sendResponseS, "sS", desc="send shared response to TCP/SQC") {
+ enqueue(responseToCore_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := tbe.Sender;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.CtoD := false;
+ out_msg.State := CoherenceState:Shared;
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.Shared := tbe.Shared;
+ out_msg.Dirty := tbe.Dirty;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(sM_sendResponseM, "sM", desc="send response to TCP/SQC") {
+ enqueue(responseToCore_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := tbe.Sender;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.CtoD := false;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.Shared := tbe.Shared;
+ out_msg.Dirty := tbe.Dirty;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+
+
+ action(fw2_forwardWBAck, "fw2", desc="forward WBAck to TCC") {
+ peek(responseFromNB_in, ResponseMsg) {
+ if(tbe.OriginalRequestor != machineID) {
+ enqueue(w_respTCC_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysWBAck;
+ out_msg.Sender := machineID;
+ //out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.MessageSize := in_msg.MessageSize;
+ }
+ }
+ }
+ }
+
+ action(sa_saveSysAck, "sa", desc="Save SysAck ") {
+ peek(responseFromNB_in, ResponseMsg) {
+ tbe.Dirty := in_msg.Dirty;
+ if (tbe.Dirty == false) {
+ tbe.DataBlk := in_msg.DataBlk;
+ }
+ else {
+ tbe.DataBlk := tbe.DataBlk;
+ }
+ tbe.CtoD := in_msg.CtoD;
+ tbe.CohState := in_msg.State;
+ tbe.Shared := in_msg.Shared;
+ tbe.MessageSize := in_msg.MessageSize;
+ }
+ }
+
+ action(fsa_forwardSavedAck, "fsa", desc="forward saved SysAck to TCP or SQC") {
+ enqueue(responseToCore_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := machineID;
+ if (tbe.Dirty == false) {
+ out_msg.DataBlk := tbe.DataBlk;
+ }
+ else {
+ out_msg.DataBlk := tbe.DataBlk;
+ }
+ out_msg.CtoD := tbe.CtoD;
+ out_msg.State := tbe.CohState;
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.Shared := tbe.Shared;
+ out_msg.MessageSize := tbe.MessageSize;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.Sender := tbe.Sender;
+ }
+ }
+
+ action(fa_forwardSysAck, "fa", desc="forward SysAck to TCP or SQC") {
+ peek(responseFromNB_in, ResponseMsg) {
+ enqueue(responseToCore_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := machineID;
+ if (tbe.Dirty == false) {
+ out_msg.DataBlk := in_msg.DataBlk;
+ tbe.Sender := machineID;
+ }
+ else {
+ out_msg.DataBlk := tbe.DataBlk;
+ }
+ out_msg.CtoD := in_msg.CtoD;
+ out_msg.State := in_msg.State;
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.Shared := in_msg.Shared;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.Dirty := in_msg.Dirty;
+ out_msg.Sender := tbe.Sender;
+ DPRINTF(RubySlicc, "%s\n", (out_msg.DataBlk));
+ }
+ }
+ }
+
+ action(pso_probeSharedDataOwner, "pso", desc="probe shared data at owner") {
+ MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ if (cache_entry.Owner.isElement(tcc)) {
+ enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.add(tcc);
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ else { // i.e., owner is a core
+ enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.addNetDest(cache_entry.Owner);
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ tbe.NumPendingAcks := 1;
+ }
+
+ action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") {
+ coreRequestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(j_popIncomingUnblockQueue, "j", desc="Pop incoming unblock queue") {
+ unblockNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pk_popResponseQueue, "pk", desc="Pop response queue") {
+ responseNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="Pop incoming probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pR_popResponseFromNBQueue, "pR", desc="Pop incoming Response queue From NB") {
+ responseFromNB_in.dequeue(clockEdge());
+ }
+
+ action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(pl_popTCCRequestQueue, "pl", desc="pop TCC request queue") {
+ w_TCCRequest_in.dequeue(clockEdge());
+ }
+
+ action(plr_popTCCResponseQueue, "plr", desc="pop TCC response queue") {
+ w_TCCResponse_in.dequeue(clockEdge());
+ }
+
+ action(plu_popTCCUnblockQueue, "plu", desc="pop TCC unblock queue") {
+ w_TCCUnblock_in.dequeue(clockEdge());
+ }
+
+
+ action(m_addUnlockerToSharers, "m", desc="Add the unlocker to the sharer list") {
+ peek(unblockNetwork_in, UnblockMsg) {
+ cache_entry.Sharers.add(in_msg.Sender);
+ cache_entry.MergedSharers.remove(in_msg.Sender);
+ assert(cache_entry.WaitingUnblocks >= 0);
+ cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks - 1;
+ }
+ }
+
+ action(q_addOutstandingMergedSharer, "q", desc="Increment outstanding requests") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ cache_entry.MergedSharers.add(in_msg.Requestor);
+ cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks + 1;
+ }
+ }
+
+ action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+ enqueue(unblockToNB_out, UnblockMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(zz_recycleRequest, "\z", desc="Recycle the request queue") {
+ coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(yy_recycleTCCRequestQueue, "yy", desc="recycle yy request queue") {
+ w_TCCRequest_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(xz_recycleResponseQueue, "xz", desc="recycle response queue") {
+ responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(xx_recycleTCCResponseQueue, "xx", desc="recycle TCC response queue") {
+ w_TCCResponse_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(vv_recycleTCCUnblockQueue, "vv", desc="Recycle the probe request queue") {
+ w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(xy_recycleUnblockQueue, "xy", desc="Recycle the probe request queue") {
+ w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(ww_recycleProbeRequest, "ww", desc="Recycle the probe request queue") {
+ probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(x_decrementAcks, "x", desc="decrement Acks pending") {
+ tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+ }
+
+ action(o_checkForAckCompletion, "o", desc="check for ack completion") {
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ APPEND_TRANSITION_COMMENT(" tbe acks ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ }
+
+ action(tp_allocateTBE, "tp", desc="allocate TBE Entry for upward transactions") {
+ check_allocate(TBEs);
+ peek(probeNetwork_in, NBProbeRequestMsg) {
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.Dirty := false;
+ tbe.NumPendingAcks := 0;
+ tbe.UntransferredOwnerExists := false;
+ }
+ }
+
+ action(tv_allocateTBE, "tv", desc="allocate TBE Entry for TCC transactions") {
+ check_allocate(TBEs);
+ peek(w_TCCRequest_in, CPURequestMsg) {
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.DataBlk := in_msg.DataBlk; // Data only for WBs
+ tbe.Dirty := false;
+ tbe.OriginalRequestor := in_msg.Requestor;
+ tbe.NumPendingAcks := 0;
+ tbe.UntransferredOwnerExists := false;
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);//check whether resources are full
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
+ tbe.Dirty := false;
+ tbe.Upgrade := false;
+ tbe.OriginalRequestor := in_msg.Requestor;
+ tbe.NumPendingAcks := 0;
+ tbe.UntransferredOwnerExists := false;
+ tbe.Sender := machineID;
+ }
+ }
+
+ action(tr_allocateTBE, "tr", desc="allocate TBE Entry for recall") {
+ check_allocate(TBEs);//check whether resources are full
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
+ tbe.Dirty := false;
+ tbe.Upgrade := false;
+ tbe.OriginalRequestor := machineID; //Recall request, Self initiated
+ tbe.NumPendingAcks := 0;
+ tbe.UntransferredOwnerExists := false;
+ }
+
+ action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+
+ action(d_allocateDir, "d", desc="allocate Directory Cache") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(directory.allocate(address, new Entry));
+ }
+ }
+
+ action(dd_deallocateDir, "dd", desc="deallocate Directory Cache") {
+ if (is_valid(cache_entry)) {
+ directory.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+ enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:StaleNotif;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(wb_data, "wb", desc="write back data") {
+ enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUData;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Shared) {
+ out_msg.NbReqShared := true;
+ } else {
+ out_msg.NbReqShared := false;
+ }
+ out_msg.State := CoherenceState:Shared; // faux info
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
+ assert(is_valid(tbe));
+ tbe.Shared := true;
+ }
+
+ action(y_writeDataToTBE, "y", desc="write Probe Data to TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (!tbe.Dirty || in_msg.Dirty) {
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := in_msg.Dirty;
+ }
+ if (in_msg.Hit) {
+ tbe.Cached := true;
+ }
+ }
+ }
+
+ action(ty_writeTCCDataToTBE, "ty", desc="write TCC Probe Data to TBE") {
+ peek(w_TCCResponse_in, ResponseMsg) {
+ if (!tbe.Dirty || in_msg.Dirty) {
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := in_msg.Dirty;
+ }
+ if (in_msg.Hit) {
+ tbe.Cached := true;
+ }
+ }
+ }
+
+
+ action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+ directory.setMRU(address);
+ }
+
+ // TRANSITIONS
+
+ // Handling TCP/SQC requests (similar to how NB dir handles TCC events with some changes to account for stateful directory).
+
+
+ // transitions from base
+ transition(I, RdBlk, I_ES){TagArrayRead} {
+ d_allocateDir;
+ t_allocateTBE;
+ n_issueRdBlk;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(I, RdBlkS, I_S){TagArrayRead} {
+ d_allocateDir;
+ t_allocateTBE;
+ nS_issueRdBlkS;
+ i_popIncomingRequestQueue;
+ }
+
+
+ transition(I_S, NB_AckS, BBB_S) {
+ fa_forwardSysAck;
+ pR_popResponseFromNBQueue;
+ }
+
+ transition(I_ES, NB_AckS, BBB_S) {
+ fa_forwardSysAck;
+ pR_popResponseFromNBQueue;
+ }
+
+ transition(I_ES, NB_AckE, BBB_E) {
+ fa_forwardSysAck;
+ pR_popResponseFromNBQueue;
+ }
+
+ transition({S_M, O_M}, {NB_AckCtoD,NB_AckM}, BBB_M) {
+ fa_forwardSysAck;
+ pR_popResponseFromNBQueue;
+ }
+
+ transition(I_M, NB_AckM, BBB_M) {
+ fa_forwardSysAck;
+ pR_popResponseFromNBQueue;
+ }
+
+ transition(BBB_M, CoreUnblock, M){TagArrayWrite} {
+ c_clearOwner;
+ cc_clearSharers;
+ e_ownerIsUnblocker;
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ j_popIncomingUnblockQueue;
+ }
+
+ transition(BBB_S, CoreUnblock, S){TagArrayWrite} {
+ as_addToSharers;
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ j_popIncomingUnblockQueue;
+ }
+
+ transition(BBB_E, CoreUnblock, E){TagArrayWrite} {
+ as_addToSharers;
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ j_popIncomingUnblockQueue;
+ }
+
+
+ transition(I, RdBlkM, I_M){TagArrayRead} {
+ d_allocateDir;
+ t_allocateTBE;
+ nM_issueRdBlkM;
+ i_popIncomingRequestQueue;
+ }
+
+ //
+ transition(S, {RdBlk, RdBlkS}, BBS_S){TagArrayRead} {
+ t_allocateTBE;
+ sc_probeShrCoreData;
+ s2_probeShrL2Data;
+ q_addOutstandingMergedSharer;
+ i_popIncomingRequestQueue;
+ }
+ // Merging of read sharing into a single request
+ transition(BBS_S, {RdBlk, RdBlkS}) {
+ q_addOutstandingMergedSharer;
+ i_popIncomingRequestQueue;
+ }
+ // Wait for probe acks to be complete
+ transition(BBS_S, CPUPrbResp) {
+ ccr_copyCoreResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+
+ transition(BBS_S, TCCPrbResp) {
+ ctr_copyTCCResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+
+ // Window for merging complete with this transition
+ // Send responses to all outstanding
+ transition(BBS_S, ProbeAcksComplete, BB_S) {
+ sCS_sendCollectiveResponseS;
+ pt_popTriggerQueue;
+ }
+
+ transition(BB_S, CoreUnblock, BB_S) {
+ m_addUnlockerToSharers;
+ j_popIncomingUnblockQueue;
+ }
+
+ transition(BB_S, LastCoreUnblock, S) {
+ m_addUnlockerToSharers;
+ dt_deallocateTBE;
+ j_popIncomingUnblockQueue;
+ }
+
+ transition(O, {RdBlk, RdBlkS}, BBO_O){TagArrayRead} {
+ t_allocateTBE;
+ pso_probeSharedDataOwner;
+ q_addOutstandingMergedSharer;
+ i_popIncomingRequestQueue;
+ }
+ // Merging of read sharing into a single request
+ transition(BBO_O, {RdBlk, RdBlkS}) {
+ q_addOutstandingMergedSharer;
+ i_popIncomingRequestQueue;
+ }
+
+ // Wait for probe acks to be complete
+ transition(BBO_O, CPUPrbResp) {
+ ccr_copyCoreResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+
+ transition(BBO_O, TCCPrbResp) {
+ ctr_copyTCCResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+
+ // Window for merging complete with this transition
+ // Send responses to all outstanding
+ transition(BBO_O, ProbeAcksComplete, BB_OO) {
+ sCS_sendCollectiveResponseS;
+ pt_popTriggerQueue;
+ }
+
+ transition(BB_OO, CoreUnblock) {
+ m_addUnlockerToSharers;
+ j_popIncomingUnblockQueue;
+ }
+
+ transition(BB_OO, LastCoreUnblock, O){TagArrayWrite} {
+ m_addUnlockerToSharers;
+ dt_deallocateTBE;
+ j_popIncomingUnblockQueue;
+ }
+
+ transition(S, CPUWrite, BW_S){TagArrayRead} {
+ t_allocateTBE;
+ rC_removeCoreFromSharers;
+ sT_sendRequestToTCC;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(E, CPUWrite, BW_E){TagArrayRead} {
+ t_allocateTBE;
+ rC_removeCoreFromSharers;
+ sT_sendRequestToTCC;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(O, CPUWrite, BW_O){TagArrayRead} {
+ t_allocateTBE;
+ rCo_removeCoreFromOwner;
+ rC_removeCoreFromSharers;
+ sT_sendRequestToTCC;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(M, CPUWrite, BW_M){TagArrayRead} {
+ t_allocateTBE;
+ rCo_removeCoreFromOwner;
+ rC_removeCoreFromSharers;
+ sT_sendRequestToTCC;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(BW_S, TCCUnblock_Sharer, S){TagArrayWrite} {
+ aT_addTCCToSharers;
+ dt_deallocateTBE;
+ plu_popTCCUnblockQueue;
+ }
+
+ transition(BW_S, TCCUnblock_NotValid, S){TagArrayWrite} {
+ dt_deallocateTBE;
+ plu_popTCCUnblockQueue;
+ }
+
+ transition(BW_E, TCCUnblock, E){TagArrayWrite} {
+ cc_clearSharers;
+ aT_addTCCToSharers;
+ dt_deallocateTBE;
+ plu_popTCCUnblockQueue;
+ }
+
+ transition(BW_E, TCCUnblock_NotValid, E) {
+ dt_deallocateTBE;
+ plu_popTCCUnblockQueue;
+ }
+
+ transition(BW_M, TCCUnblock, M) {
+ c_clearOwner;
+ cc_clearSharers;
+ eT_ownerIsUnblocker;
+ dt_deallocateTBE;
+ plu_popTCCUnblockQueue;
+ }
+
+ transition(BW_M, TCCUnblock_NotValid, M) {
+ // Note this transition should only be executed if we received a stale wb
+ dt_deallocateTBE;
+ plu_popTCCUnblockQueue;
+ }
+
+ transition(BW_O, TCCUnblock, O) {
+ c_clearOwner;
+ eT_ownerIsUnblocker;
+ dt_deallocateTBE;
+ plu_popTCCUnblockQueue;
+ }
+
+ transition(BW_O, TCCUnblock_NotValid, O) {
+ // Note this transition should only be executed if we received a stale wb
+ dt_deallocateTBE;
+ plu_popTCCUnblockQueue;
+ }
+
+ // We lost the owner likely do to an invalidation racing with a 'O' wb
+ transition(BW_O, TCCUnblock_Sharer, S) {
+ c_clearOwner;
+ aT_addTCCToSharers;
+ dt_deallocateTBE;
+ plu_popTCCUnblockQueue;
+ }
+
+ transition({BW_M, BW_S, BW_E, BW_O}, {PrbInv,PrbInvData,PrbShrData}) {
+ ww_recycleProbeRequest;
+ }
+
+ transition(BRWD_I, {PrbInvData, PrbInv, PrbShrData}) {
+ ww_recycleProbeRequest;
+ }
+
+ // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD
+ transition(S, CtoD, BBS_UM) {TagArrayRead} {
+ t_allocateTBE;
+ lpc_probeInvCore;
+ i2_probeInvL2;
+ o_checkForAckCompletion;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(BBS_UM, CPUPrbResp, BBS_UM) {
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+
+ transition(BBS_UM, TCCPrbResp) {
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+
+ transition(BBS_UM, ProbeAcksComplete, S_M) {
+ rU_rememberUpgrade;
+ nM_issueRdBlkM;
+ pt_popTriggerQueue;
+ }
+
+ // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD
+ transition(O, CtoD, BBO_UM){TagArrayRead} {
+ t_allocateTBE;
+ lpc_probeInvCore;
+ i2_probeInvL2;
+ o_checkForAckCompletion;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(BBO_UM, CPUPrbResp, BBO_UM) {
+ ruo_rememberUntransferredOwner;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+
+ transition(BBO_UM, TCCPrbResp) {
+ ruoT_rememberUntransferredOwnerTCC;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+
+ transition(BBO_UM, ProbeAcksComplete, O_M) {
+ rU_rememberUpgrade;
+ nM_issueRdBlkM;
+ pt_popTriggerQueue;
+ }
+
+ transition({S,E}, RdBlkM, BBS_M){TagArrayWrite} {
+ t_allocateTBE;
+ ldc_probeInvCoreData;
+ ld2_probeInvL2Data;
+ o_checkForAckCompletion;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(BBS_M, CPUPrbResp) {
+ ccr_copyCoreResponseToTBE;
+ rR_removeResponderFromSharers;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+
+ transition(BBS_M, TCCPrbResp) {
+ ctr_copyTCCResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+
+ transition(BBS_M, ProbeAcksComplete, S_M) {
+ nM_issueRdBlkM;
+ pt_popTriggerQueue;
+ }
+
+ transition(O, RdBlkM, BBO_M){TagArrayRead} {
+ t_allocateTBE;
+ ldc_probeInvCoreData;
+ ld2_probeInvL2Data;
+ o_checkForAckCompletion;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(BBO_M, CPUPrbResp) {
+ ccr_copyCoreResponseToTBE;
+ rR_removeResponderFromSharers;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+
+ transition(BBO_M, TCCPrbResp) {
+ ctr_copyTCCResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+
+ transition(BBO_M, ProbeAcksComplete, O_M) {
+ nM_issueRdBlkM;
+ pt_popTriggerQueue;
+ }
+
+ //
+ transition(M, RdBlkM, BBM_M){TagArrayRead} {
+ t_allocateTBE;
+ ldc_probeInvCoreData;
+ ld2_probeInvL2Data;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(BBM_M, CPUPrbResp) {
+ ccr_copyCoreResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+
+ // TCP recalled block before receiving probe
+ transition({BBM_M, BBS_M, BBO_M}, {CPUWrite,NoCPUWrite}) {
+ zz_recycleRequest;
+ }
+
+ transition(BBM_M, TCCPrbResp) {
+ ctr_copyTCCResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+
+ transition(BBM_M, ProbeAcksComplete, BB_M) {
+ sM_sendResponseM;
+ pt_popTriggerQueue;
+ }
+
+ transition(BB_M, CoreUnblock, M){TagArrayWrite} {
+ e_ownerIsUnblocker;
+ dt_deallocateTBE;
+ j_popIncomingUnblockQueue;
+ }
+
+ transition(M, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} {
+ t_allocateTBE;
+ sc_probeShrCoreData;
+ s2_probeShrL2Data;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(E, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} {
+ t_allocateTBE;
+ eto_moveExSharerToOwner;
+ sc_probeShrCoreData;
+ s2_probeShrL2Data;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(BBM_O, CPUPrbResp) {
+ ccr_copyCoreResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+ transition(BBM_O, TCCPrbResp) {
+ ctr_copyTCCResponseToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ transition(BBM_O, ProbeAcksComplete, BB_O) {
+ sS_sendResponseS;
+ pt_popTriggerQueue;
+ }
+
+ transition(BB_O, CoreUnblock, O){TagArrayWrite} {
+ as_addToSharers;
+ dt_deallocateTBE;
+ j_popIncomingUnblockQueue;
+ }
+
+ transition({BBO_O, BBM_M, BBS_S, BBM_O, BB_M, BB_O, BB_S, BBO_UM, BBS_UM, BBS_M, BBO_M, BB_OO}, {PrbInvData, PrbInv,PrbShrData}) {
+ ww_recycleProbeRequest;
+ }
+
+ transition({BBM_O, BBS_S, CP_S, CP_O, CP_SM, CP_OM, BBO_O}, {CPUWrite,NoCPUWrite}) {
+ zz_recycleRequest;
+ }
+
+ // stale CtoD raced with external invalidation
+ transition({I, CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, CtoD) {
+ i_popIncomingRequestQueue;
+ }
+
+ // stale CtoD raced with internal RdBlkM
+ transition({BBM_M, BBS_M, BBO_M, BBB_M, BBS_UM, BBO_UM}, CtoD) {
+ i_popIncomingRequestQueue;
+ }
+
+ transition({E, M}, CtoD) {
+ i_popIncomingRequestQueue;
+ }
+
+
+ // TCC-directory has sent out (And potentially received acks for) probes.
+ // TCP/SQC replacement (known to be stale subsequent) are popped off.
+ transition({BBO_UM, BBS_UM}, {CPUWrite,NoCPUWrite}) {
+ nC_sendNullWBAckToCore;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(S_M, {NoCPUWrite, CPUWrite}) {
+ zz_recycleRequest;
+ }
+
+ transition(O_M, {NoCPUWrite, CPUWrite}) {
+ zz_recycleRequest;
+ }
+
+
+ transition({BBM_M, BBS_M, BBO_M, BBO_UM, BBS_UM}, {VicDirty, VicClean, VicDirtyLast, NoVic}) {
+ nT_sendNullWBAckToTCC;
+ pl_popTCCRequestQueue;
+ }
+
+ transition({CP_S, CP_O, CP_OM, CP_SM}, {VicDirty, VicClean, VicDirtyLast, CancelWB, NoVic}) {
+ yy_recycleTCCRequestQueue;
+ }
+
+ // However, when TCCdir has sent out PrbSharedData, one cannot ignore.
+ transition({BBS_S, BBO_O, BBM_O, S_M, O_M, BBB_M, BBB_S, BBB_E}, {VicDirty, VicClean, VicDirtyLast,CancelWB}) {
+ yy_recycleTCCRequestQueue;
+ }
+
+ transition({BW_S,BW_E,BW_O, BW_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) {
+ yy_recycleTCCRequestQueue;
+ }
+
+ transition({BW_S,BW_E,BW_O, BW_M}, CancelWB) {
+ nT_sendNullWBAckToTCC;
+ pl_popTCCRequestQueue;
+ }
+
+
+ /// recycle if waiting for unblocks.
+ transition({BB_M,BB_O,BB_S,BB_OO}, {VicDirty, VicClean, VicDirtyLast,NoVic,CancelWB}) {
+ yy_recycleTCCRequestQueue;
+ }
+
+ transition({BBS_S, BBO_O}, NoVic) {
+ rT_removeTCCFromSharers;
+ nT_sendNullWBAckToTCC;
+ pl_popTCCRequestQueue;
+ }
+
+ // stale. Pop message and send dummy ack.
+ transition({I_S, I_ES, I_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) {
+ nT_sendNullWBAckToTCC;
+ pl_popTCCRequestQueue;
+ }
+
+ transition(M, VicDirtyLast, VM_I){TagArrayRead} {
+ tv_allocateTBE;
+ vd_victim;
+ pl_popTCCRequestQueue;
+ }
+
+ transition(E, VicDirty, VM_I){TagArrayRead} {
+ tv_allocateTBE;
+ vd_victim;
+ pl_popTCCRequestQueue;
+ }
+
+ transition(O, VicDirty, VO_S){TagArrayRead} {
+ tv_allocateTBE;
+ vd_victim;
+ pl_popTCCRequestQueue;
+ }
+
+ transition(O, {VicDirtyLast, VicClean}, VO_I){TagArrayRead} {
+ tv_allocateTBE;
+ vd_victim;
+ pl_popTCCRequestQueue;
+ }
+
+ transition({E, S}, VicClean, VES_I){TagArrayRead} {
+ tv_allocateTBE;
+ vc_victim;
+ pl_popTCCRequestQueue;
+ }
+
+ transition({O, S}, NoVic){TagArrayRead} {
+ rT_removeTCCFromSharers;
+ nT_sendNullWBAckToTCC;
+ pl_popTCCRequestQueue;
+ }
+
+ transition({O,S}, NoCPUWrite){TagArrayRead} {
+ rC_removeCoreFromSharers;
+ nC_sendNullWBAckToCore;
+ i_popIncomingRequestQueue;
+ }
+
+ transition({M,E}, NoCPUWrite){TagArrayRead} {
+ rC_removeCoreFromSharers;
+ nC_sendNullWBAckToCore;
+ i_popIncomingRequestQueue;
+ }
+
+ // This can only happen if it is race. (TCCdir sent out probes which caused this cancel in the first place.)
+ transition({VM_I, VES_I, VO_I}, CancelWB) {
+ pl_popTCCRequestQueue;
+ }
+
+ transition({VM_I, VES_I, VO_I}, NB_AckWB, I){TagArrayWrite} {
+ c_clearOwner;
+ cc_clearSharers;
+ wb_data;
+ fw2_forwardWBAck;
+ dt_deallocateTBE;
+ dd_deallocateDir;
+ pR_popResponseFromNBQueue;
+ }
+
+ transition(VO_S, NB_AckWB, S){TagArrayWrite} {
+ c_clearOwner;
+ wb_data;
+ fw2_forwardWBAck;
+ dt_deallocateTBE;
+ pR_popResponseFromNBQueue;
+ }
+
+ transition(I_C, NB_AckWB, I){TagArrayWrite} {
+ c_clearOwner;
+ cc_clearSharers;
+ ss_sendStaleNotification;
+ fw2_forwardWBAck;
+ dt_deallocateTBE;
+ dd_deallocateDir;
+ pR_popResponseFromNBQueue;
+ }
+
+ transition(I_W, NB_AckWB, I) {
+ ss_sendStaleNotification;
+ dt_deallocateTBE;
+ dd_deallocateDir;
+ pR_popResponseFromNBQueue;
+ }
+
+
+
+ // Do not handle replacements, reads of any kind or writebacks from transients; recycle
+ transition({I_M, I_ES, I_S, MO_I, ES_I, S_M, O_M, VES_I, VO_I, VO_S, VM_I, I_C, I_W}, {RdBlkS,RdBlkM,RdBlk,CtoD}) {
+ zz_recycleRequest;
+ }
+
+ transition( VO_S, NoCPUWrite) {
+ zz_recycleRequest;
+ }
+
+ transition({BW_M, BW_S, BW_O, BW_E}, {RdBlkS,RdBlkM,RdBlk,CtoD,NoCPUWrite, CPUWrite}) {
+ zz_recycleRequest;
+ }
+
+ transition({BBB_M, BBB_S, BBB_E, BB_O, BB_M, BB_S, BB_OO}, { RdBlk, RdBlkS, RdBlkM, CPUWrite, NoCPUWrite}) {
+ zz_recycleRequest;
+ }
+
+ transition({BBB_S, BBB_E, BB_O, BB_S, BB_OO}, { CtoD}) {
+ zz_recycleRequest;
+ }
+
+ transition({BBS_UM, BBO_UM, BBM_M, BBM_O, BBS_M, BBO_M}, { RdBlk, RdBlkS, RdBlkM}) {
+ zz_recycleRequest;
+ }
+
+ transition(BBM_O, CtoD) {
+ zz_recycleRequest;
+ }
+
+ transition({BBS_S, BBO_O}, {RdBlkM, CtoD}) {
+ zz_recycleRequest;
+ }
+
+ transition({B_I, CP_I, CP_S, CP_O, CP_OM, CP_SM, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {RdBlk, RdBlkS, RdBlkM}) {
+ zz_recycleRequest;
+ }
+
+ transition({CP_O, CP_S, CP_OM}, CtoD) {
+ zz_recycleRequest;
+ }
+
+ // Ignore replacement related messages after probe got in.
+ transition({CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {CPUWrite, NoCPUWrite}) {
+ zz_recycleRequest;
+ }
+
+ // Ignore replacement related messages after probes processed
+ transition({I, I_S, I_ES, I_M, I_C, I_W}, {CPUWrite,NoCPUWrite}) {
+ nC_sendNullWBAckToCore;
+ i_popIncomingRequestQueue;
+ }
+ // cannot ignore cancel... otherwise TCP/SQC will be stuck in I_C
+ transition({I, I_S, I_ES, I_M, I_C, I_W, S_M, M, O, E, S}, CPUWriteCancel){TagArrayRead} {
+ nC_sendNullWBAckToCore;
+ i_popIncomingRequestQueue;
+ }
+
+ transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, {NoVic, VicClean, VicDirty, VicDirtyLast}){
+ nT_sendNullWBAckToTCC;
+ pl_popTCCRequestQueue;
+ }
+
+ // Handling Probes from NB (General process: (1) propagate up, go to blocking state (2) process acks (3) on last ack downward.)
+
+ // step 1
+ transition({M, O, E, S}, PrbInvData, CP_I){TagArrayRead} {
+ tp_allocateTBE;
+ dc_probeInvCoreData;
+ d2_probeInvL2Data;
+ pp_popProbeQueue;
+ }
+ // step 2a
+ transition(CP_I, CPUPrbResp) {
+ y_writeDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+ // step 2b
+ transition(CP_I, TCCPrbResp) {
+ ty_writeTCCDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ // step 3
+ transition(CP_I, ProbeAcksComplete, I){TagArrayWrite} {
+ pd_sendProbeResponseData;
+ c_clearOwner;
+ cc_clearSharers;
+ dt_deallocateTBE;
+ dd_deallocateDir;
+ pt_popTriggerQueue;
+ }
+
+ // step 1
+ transition({M, O, E, S}, PrbInv, B_I){TagArrayWrite} {
+ tp_allocateTBE;
+ ipc_probeInvCore;
+ i2_probeInvL2;
+ pp_popProbeQueue;
+ }
+ // step 2
+ transition(B_I, CPUPrbResp) {
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+ // step 2b
+ transition(B_I, TCCPrbResp) {
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ // step 3
+ transition(B_I, ProbeAcksComplete, I){TagArrayWrite} {
+ // send response down to NB
+ pi_sendProbeResponseInv;
+ c_clearOwner;
+ cc_clearSharers;
+ dt_deallocateTBE;
+ dd_deallocateDir;
+ pt_popTriggerQueue;
+ }
+
+
+ // step 1
+ transition({M, O}, PrbShrData, CP_O){TagArrayRead} {
+ tp_allocateTBE;
+ sc_probeShrCoreData;
+ s2_probeShrL2Data;
+ pp_popProbeQueue;
+ }
+
+ transition(E, PrbShrData, CP_O){TagArrayRead} {
+ tp_allocateTBE;
+ eto_moveExSharerToOwner;
+ sc_probeShrCoreData;
+ s2_probeShrL2Data;
+ pp_popProbeQueue;
+ }
+ // step 2
+ transition(CP_O, CPUPrbResp) {
+ y_writeDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+ // step 2b
+ transition(CP_O, TCCPrbResp) {
+ ty_writeTCCDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ // step 3
+ transition(CP_O, ProbeAcksComplete, O){TagArrayWrite} {
+ // send response down to NB
+ pd_sendProbeResponseData;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ //step 1
+ transition(S, PrbShrData, CP_S) {
+ tp_allocateTBE;
+ sc_probeShrCoreData;
+ s2_probeShrL2Data;
+ pp_popProbeQueue;
+ }
+ // step 2
+ transition(CP_S, CPUPrbResp) {
+ y_writeDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+ // step 2b
+ transition(CP_S, TCCPrbResp) {
+ ty_writeTCCDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ // step 3
+ transition(CP_S, ProbeAcksComplete, S) {
+ // send response down to NB
+ pd_sendProbeResponseData;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ // step 1
+ transition(O_M, PrbInvData, CP_IOM) {
+ dc_probeInvCoreData;
+ d2_probeInvL2Data;
+ pp_popProbeQueue;
+ }
+ // step 2a
+ transition(CP_IOM, CPUPrbResp) {
+ y_writeDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+ // step 2b
+ transition(CP_IOM, TCCPrbResp) {
+ ty_writeTCCDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ // step 3
+ transition(CP_IOM, ProbeAcksComplete, I_M) {
+ pdm_sendProbeResponseDataMs;
+ c_clearOwner;
+ cc_clearSharers;
+ cd_clearDirtyBitTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(CP_IOM, ProbeAcksCompleteReissue, I){TagArrayWrite} {
+ pdm_sendProbeResponseDataMs;
+ c_clearOwner;
+ cc_clearSharers;
+ dt_deallocateTBE;
+ dd_deallocateDir;
+ pt_popTriggerQueue;
+ }
+
+ // step 1
+ transition(S_M, PrbInvData, CP_ISM) {
+ dc_probeInvCoreData;
+ d2_probeInvL2Data;
+ o_checkForAckCompletion;
+ pp_popProbeQueue;
+ }
+ // step 2a
+ transition(CP_ISM, CPUPrbResp) {
+ y_writeDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+ // step 2b
+ transition(CP_ISM, TCCPrbResp) {
+ ty_writeTCCDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ // step 3
+ transition(CP_ISM, ProbeAcksComplete, I_M) {
+ pdm_sendProbeResponseDataMs;
+ c_clearOwner;
+ cc_clearSharers;
+ cd_clearDirtyBitTBE;
+
+ //dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+ transition(CP_ISM, ProbeAcksCompleteReissue, I){TagArrayWrite} {
+ pim_sendProbeResponseInvMs;
+ c_clearOwner;
+ cc_clearSharers;
+ dt_deallocateTBE;
+ dd_deallocateDir;
+ pt_popTriggerQueue;
+ }
+
+ // step 1
+ transition({S_M, O_M}, {PrbInv}, CP_ISM) {
+ dc_probeInvCoreData;
+ d2_probeInvL2Data;
+ pp_popProbeQueue;
+ }
+ // next steps inherited from BS_ISM
+
+ // Simpler cases
+
+ transition({I_C, I_W}, {PrbInvData, PrbInv, PrbShrData}) {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ //If the directory is certain that the block is not present, one can send an acknowledgement right away.
+ // No need for three step process.
+ transition(I, {PrbInv,PrbShrData,PrbInvData}){TagArrayRead} {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition({I_M, I_ES, I_S}, {PrbInv, PrbInvData}) {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition({I_M, I_ES, I_S}, PrbShrData) {
+ prm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ //step 1
+ transition(S_M, PrbShrData, CP_SM) {
+ sc_probeShrCoreData;
+ s2_probeShrL2Data;
+ o_checkForAckCompletion;
+ pp_popProbeQueue;
+ }
+ // step 2
+ transition(CP_SM, CPUPrbResp) {
+ y_writeDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+ // step 2b
+ transition(CP_SM, TCCPrbResp) {
+ ty_writeTCCDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ // step 3
+ transition(CP_SM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, S_M){DataArrayRead} {
+ // send response down to NB
+ pd_sendProbeResponseData;
+ pt_popTriggerQueue;
+ }
+
+ //step 1
+ transition(O_M, PrbShrData, CP_OM) {
+ sc_probeShrCoreData;
+ s2_probeShrL2Data;
+ pp_popProbeQueue;
+ }
+ // step 2
+ transition(CP_OM, CPUPrbResp) {
+ y_writeDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+ // step 2b
+ transition(CP_OM, TCCPrbResp) {
+ ty_writeTCCDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ // step 3
+ transition(CP_OM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, O_M) {
+ // send response down to NB
+ pd_sendProbeResponseData;
+ pt_popTriggerQueue;
+ }
+
+ transition(BRW_I, PrbInvData, I_W) {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({VM_I,VO_I}, PrbInvData, I_C) {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition(VES_I, {PrbInvData,PrbInv}, I_C) {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition({VM_I, VO_I, BRW_I}, PrbInv, I_W) {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition({VM_I, VO_I, VO_S, VES_I, BRW_I}, PrbShrData) {
+ pd_sendProbeResponseData;
+ sf_setSharedFlip;
+ pp_popProbeQueue;
+ }
+
+ transition(VO_S, PrbInvData, CP_OSIW) {
+ dc_probeInvCoreData;
+ d2_probeInvL2Data;
+ pp_popProbeQueue;
+ }
+
+ transition(CP_OSIW, TCCPrbResp) {
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+ transition(CP_OSIW, CPUPrbResp) {
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+
+ transition(CP_OSIW, ProbeAcksComplete, I_C) {
+ pd_sendProbeResponseData;
+ cd_clearDirtyBitTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition({I, S, E, O, M, CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W}, StaleVic) {
+ nT_sendNullWBAckToTCC;
+ pl_popTCCRequestQueue;
+ }
+
+ transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, StaleVic) {
+ nT_sendNullWBAckToTCC;
+ pl_popTCCRequestQueue;
+ }
+
+ // Recall Transistions
+ // transient states still require the directory state
+ transition({M, O}, Recall, BRWD_I) {
+ tr_allocateTBE;
+ vd_victim;
+ dc_probeInvCoreData;
+ d2_probeInvL2Data;
+ }
+
+ transition({E, S}, Recall, BRWD_I) {
+ tr_allocateTBE;
+ vc_victim;
+ dc_probeInvCoreData;
+ d2_probeInvL2Data;
+ }
+
+ transition(I, Recall) {
+ dd_deallocateDir;
+ }
+
+ transition({BRWD_I, BRD_I}, CPUPrbResp) {
+ y_writeDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ pk_popResponseQueue;
+ }
+
+ transition({BRWD_I, BRD_I}, TCCPrbResp) {
+ ty_writeTCCDataToTBE;
+ x_decrementAcks;
+ o_checkForAckCompletion;
+ plr_popTCCResponseQueue;
+ }
+
+ transition(BRWD_I, NB_AckWB, BRD_I) {
+ pR_popResponseFromNBQueue;
+ }
+
+ transition(BRWD_I, ProbeAcksComplete, BRW_I) {
+ pt_popTriggerQueue;
+ }
+
+ transition(BRW_I, NB_AckWB, I) {
+ wb_data;
+ dt_deallocateTBE;
+ dd_deallocateDir;
+ pR_popResponseFromNBQueue;
+ }
+
+ transition(BRD_I, ProbeAcksComplete, I) {
+ wb_data;
+ dt_deallocateTBE;
+ dd_deallocateDir;
+ pt_popTriggerQueue;
+ }
+
+ // wait for stable state for Recall
+ transition({BRWD_I,BRD_I,BRW_I,CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W, CP_I}, Recall) {
+ zz_recycleRequest; // stall and wait would be for the wrong address
+ ut_updateTag; // try to find an easier recall
+ }
+
+}
diff --git a/src/mem/protocol/GPU_RfO-TCP.sm b/src/mem/protocol/GPU_RfO-TCP.sm
new file mode 100644
index 000000000..6cf9224a6
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO-TCP.sm
@@ -0,0 +1,1009 @@
+/*
+ * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
+ : GPUCoalescer* coalescer;
+ Sequencer* sequencer;
+ bool use_seq_not_coal;
+ CacheMemory * L1cache;
+ int TCC_select_num_bits;
+ Cycles issue_latency := 40; // time to send data down to TCC
+ Cycles l2_hit_latency := 18;
+
+ MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request";
+ MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response";
+ MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
+
+ MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request";
+ MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response";
+
+ MessageBuffer * mandatoryQueue;
+{
+ state_declaration(State, desc="TCP Cache States", default="TCP_State_I") {
+ I, AccessPermission:Invalid, desc="Invalid";
+ S, AccessPermission:Read_Only, desc="Shared";
+ E, AccessPermission:Read_Write, desc="Exclusive";
+ O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line";
+ M, AccessPermission:Read_Write, desc="Modified";
+
+ I_M, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+ I_ES, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+ S_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+ O_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+
+ ES_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack";
+ MO_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for dirty WB ack";
+
+ MO_PI, AccessPermission:Read_Only, desc="L1 downgrade, waiting for CtoD ack (or ProbeInvalidateData)";
+
+ I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCC for canceled WB";
+ }
+
+ enumeration(Event, desc="TCP Events") {
+ // Core initiated
+ Load, desc="Load";
+ Store, desc="Store";
+
+ // TCC initiated
+ TCC_AckS, desc="TCC Ack to Core Request";
+ TCC_AckE, desc="TCC Ack to Core Request";
+ TCC_AckM, desc="TCC Ack to Core Request";
+ TCC_AckCtoD, desc="TCC Ack to Core Request";
+ TCC_AckWB, desc="TCC Ack for clean WB";
+ TCC_NackWB, desc="TCC Nack for clean WB";
+
+ // Mem sys initiated
+ Repl, desc="Replacing block from cache";
+
+ // Probe Events
+ PrbInvData, desc="probe, return O or M data";
+ PrbInv, desc="probe, no need for data";
+ LocalPrbInv, desc="local probe, no need for data";
+ PrbShrData, desc="probe downgrade, return O or M data";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff than memory)?";
+ DataBlock DataBlk, desc="data for the block";
+ bool FromL2, default="false", desc="block just moved from L2";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
+ bool Dirty, desc="Is the data dirty (different than memory)?";
+ int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
+ bool Shared, desc="Victim hit by shared probe";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs";
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ // Internal functions
+ Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+ Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
+ return cache_entry;
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return tbe.DataBlk;
+ } else {
+ return getCacheEntry(addr).DataBlk;
+ }
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if(is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return TCP_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return TCP_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ bool isValid(Addr addr) {
+ AccessPermission perm := getAccessPermission(addr);
+ if (perm == AccessPermission:NotPresent ||
+ perm == AccessPermission:Invalid ||
+ perm == AccessPermission:Busy) {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(TCP_State_to_permission(state));
+ }
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ MachineType getCoherenceType(MachineID myMachID,
+ MachineID senderMachID) {
+ if(myMachID == senderMachID) {
+ return MachineType:TCP;
+ } else if(machineIDToMachineType(senderMachID) == MachineType:TCP) {
+ return MachineType:L1Cache_wCC;
+ } else if(machineIDToMachineType(senderMachID) == MachineType:TCC) {
+ return MachineType:TCC;
+ } else {
+ return MachineType:TCCdir;
+ }
+ }
+
+ // Out Ports
+
+ out_port(requestNetwork_out, CPURequestMsg, requestFromTCP);
+ out_port(responseNetwork_out, ResponseMsg, responseFromTCP);
+ out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
+
+ // In Ports
+
+ in_port(probeNetwork_in, TDProbeRequestMsg, probeToTCP) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ DPRINTF(RubySlicc, "machineID: %s\n", machineID);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == ProbeRequestType:PrbInv) {
+ if (in_msg.ReturnData) {
+ trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+ } else {
+ if(in_msg.localCtoD) {
+ trigger(Event:LocalPrbInv, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+ assert(in_msg.ReturnData);
+ trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+ in_port(responseToTCP_in, ResponseMsg, responseToTCP) {
+ if (responseToTCP_in.isReady(clockEdge())) {
+ peek(responseToTCP_in, ResponseMsg, block_on="addr") {
+
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == CoherenceResponseType:TDSysResp) {
+ if (in_msg.State == CoherenceState:Modified) {
+ if (in_msg.CtoD) {
+ trigger(Event:TCC_AckCtoD, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:TCC_AckM, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.State == CoherenceState:Shared) {
+ trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.State == CoherenceState:Exclusive) {
+ trigger(Event:TCC_AckE, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
+ trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) {
+ trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+
+ in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+ if (mandatoryQueue_in.isReady(clockEdge())) {
+ peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+ Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+ TBE tbe := TBEs.lookup(in_msg.LineAddress);
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ if (in_msg.Type == RubyRequestType:LD) {
+ if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+ trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe);
+ } else {
+ Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else {
+ if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+ trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
+ } else {
+ Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ }
+ }
+ }
+ }
+
+ // Actions
+
+ action(ic_invCache, "ic", desc="invalidate cache") {
+ if(is_valid(cache_entry)) {
+ L1cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(n_issueRdBlk, "n", desc="Issue RdBlk") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlk;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkM;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(vd_victim, "vd", desc="Victimize M/O Data") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ assert(is_valid(cache_entry));
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicDirty;
+ out_msg.InitialRequestTime := curCycle();
+ if (cache_entry.CacheState == State:O) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ out_msg.Dirty := cache_entry.Dirty;
+ }
+ }
+
+ action(vc_victim, "vc", desc="Victimize E/S Data") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicClean;
+ out_msg.InitialRequestTime := curCycle();
+ if (cache_entry.CacheState == State:S) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ }
+ }
+
+ action(a_allocate, "a", desc="allocate block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L1cache.allocate(address, new Entry));
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ assert(is_valid(cache_entry));
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs
+ tbe.Dirty := cache_entry.Dirty;
+ tbe.Shared := false;
+ }
+
+ action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+ mandatoryQueue_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+ responseToTCP_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+
+ action(l_loadDone, "l", desc="local load done") {
+ assert(is_valid(cache_entry));
+ if (use_seq_not_coal) {
+ sequencer.readCallback(address, cache_entry.DataBlk,
+ false, MachineType:TCP);
+ } else {
+ coalescer.readCallback(address, MachineType:TCP, cache_entry.DataBlk);
+ }
+ }
+
+ action(xl_loadDone, "xl", desc="remote load done") {
+ peek(responseToTCP_in, ResponseMsg) {
+ assert(is_valid(cache_entry));
+ if (use_seq_not_coal) {
+ coalescer.recordCPReadCallBack(machineID, in_msg.Sender);
+ sequencer.readCallback(address,
+ cache_entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ } else {
+ MachineType cc_mach_type := getCoherenceType(machineID,
+ in_msg.Sender);
+ coalescer.readCallback(address,
+ cc_mach_type,
+ cache_entry.DataBlk,
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ }
+ }
+
+ action(s_storeDone, "s", desc="local store done") {
+ assert(is_valid(cache_entry));
+ if (use_seq_not_coal) {
+ coalescer.recordCPWriteCallBack(machineID, machineID);
+ sequencer.writeCallback(address, cache_entry.DataBlk,
+ false, MachineType:TCP);
+ } else {
+ coalescer.writeCallback(address, MachineType:TCP, cache_entry.DataBlk);
+ }
+ cache_entry.Dirty := true;
+ }
+
+ action(xs_storeDone, "xs", desc="remote store done") {
+ peek(responseToTCP_in, ResponseMsg) {
+ assert(is_valid(cache_entry));
+ if (use_seq_not_coal) {
+ coalescer.recordCPWriteCallBack(machineID, in_msg.Sender);
+ sequencer.writeCallback(address,
+ cache_entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ } else {
+ MachineType cc_mach_type := getCoherenceType(machineID,
+ in_msg.Sender);
+ coalescer.writeCallback(address,
+ cc_mach_type,
+ cache_entry.DataBlk,
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ cache_entry.Dirty := true;
+ }
+ }
+
+ action(w_writeCache, "w", desc="write data to cache") {
+ peek(responseToTCP_in, ResponseMsg) {
+ assert(is_valid(cache_entry));
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+ peek(responseToTCP_in, ResponseMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:StaleNotif;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(wb_data, "wb", desc="write back data") {
+ peek(responseToTCP_in, ResponseMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUData;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Shared) {
+ out_msg.NbReqShared := true;
+ } else {
+ out_msg.NbReqShared := false;
+ }
+ out_msg.State := CoherenceState:Shared; // faux info
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(piu_sendProbeResponseInvUntransferredOwnership, "piu", desc="send probe ack inv, no data, retain ownership") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
+ out_msg.Sender := machineID;
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.UntransferredOwner :=true;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false;
+ out_msg.Ntsl := true;
+ out_msg.Hit := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.Dirty := false; // only true if sending back data i think
+ out_msg.Hit := false;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry) || is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.DataBlk := getDataBlock(address);
+ if (is_valid(tbe)) {
+ out_msg.Dirty := tbe.Dirty;
+ } else {
+ out_msg.Dirty := cache_entry.Dirty;
+ }
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.isValid := isValid(address);
+ APPEND_TRANSITION_COMMENT("Sending ack with dirty ");
+ APPEND_TRANSITION_COMMENT(out_msg.Dirty);
+ }
+ }
+
+ action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry) || is_valid(tbe));
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.DataBlk := getDataBlock(address);
+ if (is_valid(tbe)) {
+ out_msg.Dirty := tbe.Dirty;
+ } else {
+ out_msg.Dirty := cache_entry.Dirty;
+ }
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.isValid := isValid(address);
+ APPEND_TRANSITION_COMMENT("Sending ack with dirty ");
+ APPEND_TRANSITION_COMMENT(out_msg.Dirty);
+ DPRINTF(RubySlicc, "Data is %s\n", out_msg.DataBlk);
+ }
+ }
+
+ action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
+ assert(is_valid(tbe));
+ tbe.Shared := true;
+ }
+
+ action(mru_updateMRU, "mru", desc="Touch block for replacement policy") {
+ L1cache.setMRU(address);
+ }
+
+ action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+ enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ out_msg.wasValid := isValid(address);
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
+ probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+ mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ // Transitions
+
+ // transitions from base
+ transition(I, Load, I_ES) {TagArrayRead} {
+ a_allocate;
+ n_issueRdBlk;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, Store, I_M) {TagArrayRead, TagArrayWrite} {
+ a_allocate;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(S, Store, S_M) {TagArrayRead} {
+ mru_updateMRU;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(E, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ mru_updateMRU;
+ s_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(O, Store, O_M) {TagArrayRead, DataArrayWrite} {
+ mru_updateMRU;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(M, Store) {TagArrayRead, DataArrayWrite} {
+ mru_updateMRU;
+ s_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ // simple hit transitions
+ transition({S, E, O, M}, Load) {TagArrayRead, DataArrayRead} {
+ l_loadDone;
+ mru_updateMRU;
+ p_popMandatoryQueue;
+ }
+
+ // recycles from transients
+ transition({I_M, I_ES, ES_I, MO_I, S_M, O_M, MO_PI, I_C}, {Load, Store, Repl}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({S, E}, Repl, ES_I) {TagArrayRead} {
+ t_allocateTBE;
+ vc_victim;
+ ic_invCache;
+ }
+
+ transition({O, M}, Repl, MO_I) {TagArrayRead, DataArrayRead} {
+ t_allocateTBE;
+ vd_victim;
+ ic_invCache;
+ }
+
+ // TD event transitions
+ transition(I_M, {TCC_AckM, TCC_AckCtoD}, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ w_writeCache;
+ xs_storeDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_ES, TCC_AckS, S) {TagArrayWrite, DataArrayWrite} {
+ w_writeCache;
+ xl_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_ES, TCC_AckE, E) {TagArrayWrite, DataArrayWrite} {
+ w_writeCache;
+ xl_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition({S_M, O_M}, TCC_AckM, M) {TagArrayWrite, DataArrayWrite} {
+ xs_storeDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition({MO_I, ES_I}, TCC_NackWB, I){TagArrayWrite} {
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition({MO_I, ES_I}, TCC_AckWB, I) {TagArrayWrite, DataArrayRead} {
+ wb_data;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(I_C, TCC_AckWB, I) {TagArrayWrite} {
+ ss_sendStaleNotification;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(I_C, TCC_NackWB, I) {TagArrayWrite} {
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ // Probe transitions
+ transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
+ pd_sendProbeResponseData;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(I, PrbInvData) {TagArrayRead, TagArrayWrite} {
+ prm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ transition({E, S}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
+ pd_sendProbeResponseData;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, PrbInvData, I_C) {} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ // Needed for TCC-based protocols. Must hold on to ownership till transfer complete
+ transition({M, O}, LocalPrbInv, MO_PI){TagArrayRead, TagArrayWrite} {
+ piu_sendProbeResponseInvUntransferredOwnership;
+ pp_popProbeQueue;
+ }
+
+ // If there is a race and we see a probe invalidate, handle normally.
+ transition(MO_PI, PrbInvData, I){TagArrayWrite} {
+ pd_sendProbeResponseData;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_PI, PrbInv, I){TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ // normal exit when ownership is successfully transferred
+ transition(MO_PI, TCC_AckCtoD, I) {TagArrayWrite} {
+ ic_invCache;
+ pr_popResponseQueue;
+ }
+
+ transition({M, O, E, S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition({E, S, I}, LocalPrbInv, I){TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+
+ transition({M, E, O}, PrbShrData, O) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_PI, PrbShrData) {DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+
+ transition(S, PrbShrData, S) {TagArrayRead, DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({I, I_C}, PrbShrData) {TagArrayRead} {
+ prm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, PrbInv, I_C) {} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition({I_M, I_ES}, {PrbInv, PrbInvData}){TagArrayRead} {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ a_allocate; // but make sure there is room for incoming data when it arrives
+ pp_popProbeQueue;
+ }
+
+ transition({I_M, I_ES}, PrbShrData) {} {
+ prm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ transition(S_M, PrbInvData, I_M) {TagArrayRead} {
+ pim_sendProbeResponseInvMs;
+ ic_invCache;
+ a_allocate;
+ pp_popProbeQueue;
+ }
+
+ transition(O_M, PrbInvData, I_M) {TagArrayRead,DataArrayRead} {
+ pdm_sendProbeResponseDataMs;
+ ic_invCache;
+ a_allocate;
+ pp_popProbeQueue;
+ }
+
+ transition({S_M, O_M}, {PrbInv}, I_M) {TagArrayRead} {
+ pim_sendProbeResponseInvMs;
+ ic_invCache;
+ a_allocate;
+ pp_popProbeQueue;
+ }
+
+ transition(S_M, {LocalPrbInv}, I_M) {TagArrayRead} {
+ pim_sendProbeResponseInvMs;
+ ic_invCache;
+ a_allocate;
+ pp_popProbeQueue;
+ }
+
+ transition(O_M, LocalPrbInv, I_M) {TagArrayRead} {
+ piu_sendProbeResponseInvUntransferredOwnership;
+ ic_invCache;
+ a_allocate;
+ pp_popProbeQueue;
+ }
+
+ transition({S_M, O_M}, PrbShrData) {DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, PrbInvData, I_C){
+ pd_sendProbeResponseData;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbInvData, I_C) {DataArrayRead} {
+ pd_sendProbeResponseData;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbInv, I_C) {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, PrbInv, I_C) {
+ pi_sendProbeResponseInv;
+ ic_invCache;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, PrbShrData, ES_I) {DataArrayRead} {
+ pd_sendProbeResponseData;
+ sf_setSharedFlip;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbShrData, MO_I) {DataArrayRead} {
+ pd_sendProbeResponseData;
+ sf_setSharedFlip;
+ pp_popProbeQueue;
+ }
+
+}
diff --git a/src/mem/protocol/GPU_RfO.slicc b/src/mem/protocol/GPU_RfO.slicc
new file mode 100644
index 000000000..7773ce6e0
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO.slicc
@@ -0,0 +1,11 @@
+protocol "GPU_AMD_Base";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-dir.sm";
+include "MOESI_AMD_Base-CorePair.sm";
+include "GPU_RfO-TCP.sm";
+include "GPU_RfO-SQC.sm";
+include "GPU_RfO-TCC.sm";
+include "GPU_RfO-TCCdir.sm";
+include "MOESI_AMD_Base-L3cache.sm";
+include "MOESI_AMD_Base-RegionBuffer.sm";
diff --git a/src/mem/protocol/GPU_VIPER-SQC.sm b/src/mem/protocol/GPU_VIPER-SQC.sm
new file mode 100644
index 000000000..8d5b5699a
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER-SQC.sm
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Blake Hechtman
+ */
+
+machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
+ : Sequencer* sequencer;
+ CacheMemory * L1cache;
+ int TCC_select_num_bits;
+ Cycles issue_latency := 80; // time to send data down to TCC
+ Cycles l2_hit_latency := 18; // for 1MB L2, 20 for 2MB
+
+ MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request";
+
+ MessageBuffer * probeToSQC, network="From", virtual_network="1", vnet_type="request";
+ MessageBuffer * responseToSQC, network="From", virtual_network="3", vnet_type="response";
+
+ MessageBuffer * mandatoryQueue;
+{
+ state_declaration(State, desc="SQC Cache States", default="SQC_State_I") {
+ I, AccessPermission:Invalid, desc="Invalid";
+ V, AccessPermission:Read_Only, desc="Valid";
+ }
+
+ enumeration(Event, desc="SQC Events") {
+ // Core initiated
+ Fetch, desc="Fetch";
+ // Mem sys initiated
+ Repl, desc="Replacing block from cache";
+ Data, desc="Received Data";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff than memory)?";
+ DataBlock DataBlk, desc="data for the block";
+ bool FromL2, default="false", desc="block just moved from L2";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
+ bool Dirty, desc="Is the data dirty (different than memory)?";
+ int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
+ bool Shared, desc="Victim hit by shared probe";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<SQC_TBE>", constructor="m_number_of_TBEs";
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ // Internal functions
+ Tick clockEdge();
+
+ Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+ Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
+ return cache_entry;
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return tbe.DataBlk;
+ } else {
+ return getCacheEntry(addr).DataBlk;
+ }
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if(is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes +
+ functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return SQC_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return SQC_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(SQC_State_to_permission(state));
+ }
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ // Out Ports
+
+ out_port(requestNetwork_out, CPURequestMsg, requestFromSQC);
+
+ // In Ports
+
+ in_port(responseToSQC_in, ResponseMsg, responseToSQC) {
+ if (responseToSQC_in.isReady(clockEdge())) {
+ peek(responseToSQC_in, ResponseMsg, block_on="addr") {
+
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == CoherenceResponseType:TDSysResp) {
+ if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) {
+ trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+ } else {
+ Addr victim := L1cache.cacheProbe(in_msg.addr);
+ trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else {
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+
+ in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+ if (mandatoryQueue_in.isReady(clockEdge())) {
+ peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+ Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+ TBE tbe := TBEs.lookup(in_msg.LineAddress);
+
+ assert(in_msg.Type == RubyRequestType:IFETCH);
+ trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe);
+ }
+ }
+ }
+
+ // Actions
+
+ action(ic_invCache, "ic", desc="invalidate cache") {
+ if(is_valid(cache_entry)) {
+ L1cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlk;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(a_allocate, "a", desc="allocate block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L1cache.allocate(address, new Entry));
+ }
+ }
+
+ action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+ mandatoryQueue_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+ responseToSQC_in.dequeue(clockEdge());
+ }
+
+ action(l_loadDone, "l", desc="local load done") {
+ assert(is_valid(cache_entry));
+ sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
+ APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
+ }
+
+ action(w_writeCache, "w", desc="write data to cache") {
+ peek(responseToSQC_in, ResponseMsg) {
+ assert(is_valid(cache_entry));
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := false;
+ }
+ }
+
+ // Transitions
+
+ // transitions from base
+ transition({I, V}, Repl, I) {TagArrayRead, TagArrayWrite} {
+ ic_invCache
+ }
+
+ transition(I, Data, V) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+ a_allocate;
+ w_writeCache
+ l_loadDone;
+ pr_popResponseQueue;
+ }
+
+ transition(I, Fetch) {TagArrayRead, TagArrayWrite} {
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ // simple hit transitions
+ transition(V, Fetch) {TagArrayRead, DataArrayRead} {
+ l_loadDone;
+ p_popMandatoryQueue;
+ }
+}
diff --git a/src/mem/protocol/GPU_VIPER-TCC.sm b/src/mem/protocol/GPU_VIPER-TCC.sm
new file mode 100644
index 000000000..f62df9f4f
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER-TCC.sm
@@ -0,0 +1,739 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Blake Hechtman
+ */
+
+machine(MachineType:TCC, "TCC Cache")
+ : CacheMemory * L2cache;
+ bool WB; /*is this cache Writeback?*/
+ Cycles l2_request_latency := 50;
+ Cycles l2_response_latency := 20;
+
+ // From the TCPs or SQCs
+ MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request";
+ // To the Cores. TCC deals only with TCPs/SQCs.
+ MessageBuffer * responseToCore, network="To", virtual_network="3", vnet_type="response";
+ // From the NB
+ MessageBuffer * probeFromNB, network="From", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseFromNB, network="From", virtual_network="2", vnet_type="response";
+ // To the NB
+ MessageBuffer * requestToNB, network="To", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseToNB, network="To", virtual_network="2", vnet_type="response";
+ MessageBuffer * unblockToNB, network="To", virtual_network="4", vnet_type="unblock";
+
+ MessageBuffer * triggerQueue;
+
+{
+ // EVENTS
+ enumeration(Event, desc="TCC Events") {
+ // Requests coming from the Cores
+ RdBlk, desc="RdBlk event";
+ WrVicBlk, desc="L1 Write Through";
+ WrVicBlkBack, desc="L1 Write Through(dirty cache)";
+ Atomic, desc="Atomic Op";
+ AtomicDone, desc="AtomicOps Complete";
+ AtomicNotDone, desc="AtomicOps not Complete";
+ Data, desc="data messgae";
+ // Coming from this TCC
+ L2_Repl, desc="L2 Replacement";
+ // Probes
+ PrbInv, desc="Invalidating probe";
+ // Coming from Memory Controller
+ WBAck, desc="writethrough ack from memory";
+ }
+
+ // STATES
+ state_declaration(State, desc="TCC State", default="TCC_State_I") {
+ M, AccessPermission:Read_Write, desc="Modified(dirty cache only)";
+ W, AccessPermission:Read_Write, desc="Written(dirty cache only)";
+ V, AccessPermission:Read_Only, desc="Valid";
+ I, AccessPermission:Invalid, desc="Invalid";
+ IV, AccessPermission:Busy, desc="Waiting for Data";
+ WI, AccessPermission:Busy, desc="Waiting on Writethrough Ack";
+ A, AccessPermission:Busy, desc="Invalid waiting on atomici Data";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+
+ // STRUCTURES
+
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff from memory?)";
+ DataBlock DataBlk, desc="Data for the block";
+ WriteMask writeMask, desc="Dirty byte mask";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block";
+ bool Dirty, desc="Is the data dirty?";
+ bool Shared, desc="Victim hit by shared probe";
+ MachineID From, desc="Waiting for writeback from...";
+ NetDest Destination, desc="Data destination";
+ int numAtomics, desc="number remaining atomics";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs";
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+
+
+ // FUNCTION DEFINITIONS
+ Tick clockEdge();
+
+ Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+ return static_cast(Entry, "pointer", L2cache.lookup(addr));
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ return getCacheEntry(addr).DataBlk;
+ }
+
+ bool presentOrAvail(Addr addr) {
+ return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if (is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes +
+ functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return TCC_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return TCC_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(TCC_State_to_permission(state));
+ }
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+
+ // ** OUT_PORTS **
+
+ // Three classes of ports
+ // Class 1: downward facing network links to NB
+ out_port(requestToNB_out, CPURequestMsg, requestToNB);
+ out_port(responseToNB_out, ResponseMsg, responseToNB);
+ out_port(unblockToNB_out, UnblockMsg, unblockToNB);
+
+ // Class 2: upward facing ports to GPU cores
+ out_port(responseToCore_out, ResponseMsg, responseToCore);
+
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+ //
+ // request queue going to NB
+ //
+
+
+// ** IN_PORTS **
+ in_port(triggerQueue_in, TiggerMsg, triggerQueue) {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (tbe.numAtomics == 0) {
+ trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+
+
+ in_port(responseFromNB_in, ResponseMsg, responseFromNB) {
+ if (responseFromNB_in.isReady(clockEdge())) {
+ peek(responseFromNB_in, ResponseMsg, block_on="addr") {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+ if(presentOrAvail(in_msg.addr)) {
+ trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.addr);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+ trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+
+ // Finally handling incoming requests (from TCP) and probes (from NB).
+ in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, NBProbeRequestMsg) {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+
+ in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) {
+ if (coreRequestNetwork_in.isReady(clockEdge())) {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ if(WB) {
+ if(presentOrAvail(in_msg.addr)) {
+ trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe);
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.addr);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else {
+ trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+ trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
+ trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+ // BEGIN ACTIONS
+
+ action(i_invL2, "i", desc="invalidate TCC cache block") {
+ if (is_valid(cache_entry)) {
+ L2cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(sd_sendData, "sd", desc="send Shared response") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Shared;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+
+ action(sdr_sendDataResponse, "sdr", desc="send Shared response") {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination := tbe.Destination;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Shared;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ enqueue(unblockToNB_out, UnblockMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+
+ action(rd_requestData, "r", desc="Miss in L2, pass on") {
+ if(tbe.Destination.count()==1){
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Shared := false; // unneeded for this request
+ out_msg.MessageSize := in_msg.MessageSize;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+ }
+
+ action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+ peek(responseFromNB_in, ResponseMsg) {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysWBAck;
+ out_msg.Destination.clear();
+ out_msg.Destination.add(in_msg.WTRequestor);
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(swb_sendWBAck, "swb", desc="send WB Ack") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysWBAck;
+ out_msg.Destination.clear();
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") {
+ peek(responseFromNB_in, ResponseMsg) {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Destination.add(in_msg.WTRequestor);
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.DataBlk := in_msg.DataBlk;
+ }
+ }
+ }
+
+ action(a_allocateBlock, "a", desc="allocate TCC block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L2cache.allocate(address, new Entry));
+ cache_entry.writeMask.clear();
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ if (is_invalid(tbe)) {
+ check_allocate(TBEs);
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.Destination.clear();
+ tbe.numAtomics := 0;
+ }
+ if (coreRequestNetwork_in.isReady(clockEdge())) {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
+ tbe.Destination.add(in_msg.Requestor);
+ }
+ }
+ }
+ }
+
+ action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
+ tbe.Destination.clear();
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") {
+ peek(responseFromNB_in, ResponseMsg) {
+ cache_entry.DataBlk := in_msg.DataBlk;
+ DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+ }
+ }
+
+ action(wdb_writeDirtyBytes, "wdb", desc="write data to TCC") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ cache_entry.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
+ cache_entry.writeMask.orMask(in_msg.writeMask);
+ DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+ }
+ }
+
+ action(wt_writeThrough, "wt", desc="write back data") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.WTRequestor := in_msg.Requestor;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Data;
+ out_msg.Type := CoherenceRequestType:WriteThrough;
+ out_msg.Dirty := true;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.writeMask.orMask(in_msg.writeMask);
+ }
+ }
+ }
+
+ action(wb_writeBack, "wb", desc="write back data") {
+ enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.WTRequestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Data;
+ out_msg.Type := CoherenceRequestType:WriteThrough;
+ out_msg.Dirty := true;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.writeMask.orMask(cache_entry.writeMask);
+ }
+ }
+
+ action(at_atomicThrough, "at", desc="write back data") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.WTRequestor := in_msg.Requestor;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Data;
+ out_msg.Type := CoherenceRequestType:Atomic;
+ out_msg.Dirty := true;
+ out_msg.writeMask.orMask(in_msg.writeMask);
+ }
+ }
+ }
+
+ action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+ enqueue(responseToNB_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+ action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+ L2cache.setMRU(address);
+ }
+
+ action(p_popRequestQueue, "p", desc="pop request queue") {
+ coreRequestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="pop response queue") {
+ responseFromNB_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+
+ action(z_stall, "z", desc="stall") {
+ // built-in
+ }
+
+
+ action(ina_incrementNumAtomics, "ina", desc="inc num atomics") {
+ tbe.numAtomics := tbe.numAtomics + 1;
+ }
+
+
+ action(dna_decrementNumAtomics, "dna", desc="inc num atomics") {
+ tbe.numAtomics := tbe.numAtomics - 1;
+ if (tbe.numAtomics==0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AtomicDone;
+ }
+ }
+ }
+
+ action(ptr_popTriggerQueue, "ptr", desc="pop Trigger") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ // END ACTIONS
+
+ // BEGIN TRANSITIONS
+ // transitions from base
+ // Assumptions for ArrayRead/Write
+ // TBE checked before tags
+ // Data Read/Write requires Tag Read
+
+ // Stalling transitions do NOT check the tag array...and if they do,
+ // they can cause a resource stall deadlock!
+
+ transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
+ z_stall;
+ }
+ transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) { //TagArrayRead} {
+ z_stall;
+ }
+ transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
+ z_stall;
+ }
+ transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} {
+ sd_sendData;
+ ut_updateTag;
+ p_popRequestQueue;
+ }
+ transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} {
+ t_allocateTBE;
+ wb_writeBack;
+ }
+
+ transition(I, RdBlk, IV) {TagArrayRead} {
+ t_allocateTBE;
+ rd_requestData;
+ p_popRequestQueue;
+ }
+
+ transition(IV, RdBlk) {
+ t_allocateTBE;
+ rd_requestData;
+ p_popRequestQueue;
+ }
+
+ transition({V, I},Atomic, A) {TagArrayRead} {
+ i_invL2;
+ t_allocateTBE;
+ at_atomicThrough;
+ ina_incrementNumAtomics;
+ p_popRequestQueue;
+ }
+
+ transition(A, Atomic) {
+ at_atomicThrough;
+ ina_incrementNumAtomics;
+ p_popRequestQueue;
+ }
+
+ transition({M, W}, Atomic, WI) {TagArrayRead} {
+ t_allocateTBE;
+ wb_writeBack;
+ }
+
+ transition(I, WrVicBlk) {TagArrayRead} {
+ wt_writeThrough;
+ p_popRequestQueue;
+ }
+
+ transition(V, WrVicBlk) {TagArrayRead, DataArrayWrite} {
+ ut_updateTag;
+ wdb_writeDirtyBytes;
+ wt_writeThrough;
+ p_popRequestQueue;
+ }
+
+ transition({V, M}, WrVicBlkBack, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ ut_updateTag;
+ swb_sendWBAck;
+ wdb_writeDirtyBytes;
+ p_popRequestQueue;
+ }
+
+ transition(W, WrVicBlkBack) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ ut_updateTag;
+ swb_sendWBAck;
+ wdb_writeDirtyBytes;
+ p_popRequestQueue;
+ }
+
+ transition(I, WrVicBlkBack, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ a_allocateBlock;
+ ut_updateTag;
+ swb_sendWBAck;
+ wdb_writeDirtyBytes;
+ p_popRequestQueue;
+ }
+
+ transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} {
+ t_allocateTBE;
+ wb_writeBack;
+ i_invL2;
+ }
+
+ transition({I, V}, L2_Repl, I) {TagArrayRead, TagArrayWrite} {
+ i_invL2;
+ }
+
+ transition({A, IV, WI}, L2_Repl) {
+ i_invL2;
+ }
+
+ transition({I, V}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition(M, PrbInv, W) {TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition(W, PrbInv) {TagArrayRead} {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition({A, IV, WI}, PrbInv) {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ a_allocateBlock;
+ ut_updateTag;
+ wcb_writeCacheBlock;
+ sdr_sendDataResponse;
+ pr_popResponseQueue;
+ dt_deallocateTBE;
+ }
+
+ transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ a_allocateBlock;
+ ar_sendAtomicResponse;
+ dna_decrementNumAtomics;
+ pr_popResponseQueue;
+ }
+
+ transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} {
+ dt_deallocateTBE;
+ ptr_popTriggerQueue;
+ }
+
+ transition(A, AtomicNotDone) {TagArrayRead} {
+ ptr_popTriggerQueue;
+ }
+
+ //M,W should not see WBAck as the cache is in WB mode
+ //WBAcks do not need to check tags
+ transition({I, V, IV, A}, WBAck) {
+ w_sendResponseWBAck;
+ pr_popResponseQueue;
+ }
+
+ transition(WI, WBAck,I) {
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+}
diff --git a/src/mem/protocol/GPU_VIPER-TCP.sm b/src/mem/protocol/GPU_VIPER-TCP.sm
new file mode 100644
index 000000000..d81196b17
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER-TCP.sm
@@ -0,0 +1,747 @@
+/*
+ * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Blake Hechtman
+ */
+
+machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
+ : VIPERCoalescer* coalescer;
+ Sequencer* sequencer;
+ bool use_seq_not_coal;
+ CacheMemory * L1cache;
+ bool WB; /*is this cache Writeback?*/
+ bool disableL1; /* bypass L1 cache? */
+ int TCC_select_num_bits;
+ Cycles issue_latency := 40; // time to send data down to TCC
+ Cycles l2_hit_latency := 18;
+
+ MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request";
+ MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response";
+ MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
+
+ MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request";
+ MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response";
+ MessageBuffer * mandatoryQueue;
+
+{
+ state_declaration(State, desc="TCP Cache States", default="TCP_State_I") {
+ I, AccessPermission:Invalid, desc="Invalid";
+ V, AccessPermission:Read_Only, desc="Valid";
+ W, AccessPermission:Read_Write, desc="Written";
+ M, AccessPermission:Read_Write, desc="Written and Valid";
+ L, AccessPermission:Read_Write, desc="Local access is modifable";
+ A, AccessPermission:Invalid, desc="Waiting on Atomic";
+ }
+
+ enumeration(Event, desc="TCP Events") {
+ // Core initiated
+ Load, desc="Load";
+ Store, desc="Store to L1 (L1 is dirty)";
+ StoreThrough, desc="Store directly to L2(L1 is clean)";
+ StoreLocal, desc="Store to L1 but L1 is clean";
+ Atomic, desc="Atomic";
+ Flush, desc="Flush if dirty(wbL1 for Store Release)";
+ Evict, desc="Evict if clean(invL1 for Load Acquire)";
+ // Mem sys initiated
+ Repl, desc="Replacing block from cache";
+
+ // TCC initiated
+ TCC_Ack, desc="TCC Ack to Core Request";
+ TCC_AckWB, desc="TCC Ack for WB";
+ // Disable L1 cache
+ Bypass, desc="Bypass the entire L1 cache";
+ }
+
+ enumeration(RequestType,
+ desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ TagArrayFlash, desc="Flash clear the data array";
+ }
+
+
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff than memory)?";
+ DataBlock DataBlk, desc="data for the block";
+ bool FromL2, default="false", desc="block just moved from L2";
+ WriteMask writeMask, desc="written bytes masks";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
+ bool Dirty, desc="Is the data dirty (different than memory)?";
+ int NumPendingMsgs,desc="Number of acks/data messages that this processor is waiting for";
+ bool Shared, desc="Victim hit by shared probe";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs";
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+ int WTcnt, default="0";
+ int Fcnt, default="0";
+ bool inFlush, default="false";
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ // Internal functions
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+ Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+ Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
+ return cache_entry;
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return tbe.DataBlk;
+ } else {
+ return getCacheEntry(addr).DataBlk;
+ }
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if (is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes +
+ functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return TCP_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return TCP_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ bool isValid(Addr addr) {
+ AccessPermission perm := getAccessPermission(addr);
+ if (perm == AccessPermission:NotPresent ||
+ perm == AccessPermission:Invalid ||
+ perm == AccessPermission:Busy) {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(TCP_State_to_permission(state));
+ }
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:TagArrayFlash) {
+ L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayFlash) {
+ // FIXME should check once per cache, rather than once per cacheline
+ return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ // Out Ports
+
+ out_port(requestNetwork_out, CPURequestMsg, requestFromTCP);
+
+ // In Ports
+
+ in_port(responseToTCP_in, ResponseMsg, responseToTCP) {
+ if (responseToTCP_in.isReady(clockEdge())) {
+ peek(responseToTCP_in, ResponseMsg, block_on="addr") {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:TDSysResp) {
+ // disable L1 cache
+ if (disableL1) {
+ trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
+ } else {
+ if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) {
+ trigger(Event:TCC_Ack, in_msg.addr, cache_entry, tbe);
+ } else {
+ Addr victim := L1cache.cacheProbe(in_msg.addr);
+ trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ }
+ } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck ||
+ in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+ trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+
+ in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+ if (mandatoryQueue_in.isReady(clockEdge())) {
+ peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+ Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+ TBE tbe := TBEs.lookup(in_msg.LineAddress);
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ if (in_msg.Type == RubyRequestType:LD) {
+ trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe);
+ } else if (in_msg.Type == RubyRequestType:ATOMIC) {
+ trigger(Event:Atomic, in_msg.LineAddress, cache_entry, tbe);
+ } else if (in_msg.Type == RubyRequestType:ST) {
+ if(disableL1) {
+ trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
+ } else {
+ if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+ if (in_msg.segment == HSASegment:SPILL) {
+ trigger(Event:StoreLocal, in_msg.LineAddress, cache_entry, tbe);
+ } else if (WB) {
+ trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
+ } else {
+ trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } // end if (disableL1)
+ } else if (in_msg.Type == RubyRequestType:FLUSH) {
+ trigger(Event:Flush, in_msg.LineAddress, cache_entry, tbe);
+ } else if (in_msg.Type == RubyRequestType:REPLACEMENT){
+ trigger(Event:Evict, in_msg.LineAddress, cache_entry, tbe);
+ } else {
+ error("Unexpected Request Message from VIC");
+ if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+ if (WB) {
+ trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
+ } else {
+ trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ }
+ }
+ }
+ }
+
+ // Actions
+
+ action(ic_invCache, "ic", desc="invalidate cache") {
+ if(is_valid(cache_entry)) {
+ cache_entry.writeMask.clear();
+ L1cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(n_issueRdBlk, "n", desc="Issue RdBlk") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlk;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(rb_bypassDone, "rb", desc="bypass L1 of read access") {
+ peek(responseToTCP_in, ResponseMsg) {
+ DataBlock tmp:= in_msg.DataBlk;
+ if (use_seq_not_coal) {
+ sequencer.readCallback(address, tmp, false, MachineType:L1Cache);
+ } else {
+ coalescer.readCallback(address, MachineType:L1Cache, tmp);
+ }
+ if(is_valid(cache_entry)) {
+ unset_cache_entry();
+ }
+ }
+ }
+
+ action(wab_bypassDone, "wab", desc="bypass L1 of write access") {
+ peek(responseToTCP_in, ResponseMsg) {
+ DataBlock tmp := in_msg.DataBlk;
+ if (use_seq_not_coal) {
+ sequencer.writeCallback(address, tmp, false, MachineType:L1Cache);
+ } else {
+ coalescer.writeCallback(address, MachineType:L1Cache, tmp);
+ }
+ }
+ }
+
+ action(norl_issueRdBlkOrloadDone, "norl", desc="local load done") {
+ peek(mandatoryQueue_in, RubyRequest){
+ if (cache_entry.writeMask.cmpMask(in_msg.writeMask)) {
+ if (use_seq_not_coal) {
+ sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
+ } else {
+ coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
+ }
+ } else {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlk;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+ }
+ }
+
+ action(wt_writeThrough, "wt", desc="Flush dirty data") {
+ WTcnt := WTcnt + 1;
+ APPEND_TRANSITION_COMMENT("write++ = ");
+ APPEND_TRANSITION_COMMENT(WTcnt);
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ assert(is_valid(cache_entry));
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.writeMask.clear();
+ out_msg.writeMask.orMask(cache_entry.writeMask);
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Data;
+ out_msg.Type := CoherenceRequestType:WriteThrough;
+ out_msg.InitialRequestTime := curCycle();
+ out_msg.Shared := false;
+ }
+ }
+
+ action(at_atomicThrough, "at", desc="send Atomic") {
+ peek(mandatoryQueue_in, RubyRequest) {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.writeMask.clear();
+ out_msg.writeMask.orMask(in_msg.writeMask);
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ out_msg.MessageSize := MessageSizeType:Data;
+ out_msg.Type := CoherenceRequestType:Atomic;
+ out_msg.InitialRequestTime := curCycle();
+ out_msg.Shared := false;
+ }
+ }
+ }
+
+ action(a_allocate, "a", desc="allocate block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L1cache.allocate(address, new Entry));
+ }
+ cache_entry.writeMask.clear();
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ }
+
+ action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(sf_setFlush, "sf", desc="set flush") {
+ inFlush := true;
+ APPEND_TRANSITION_COMMENT(" inFlush is true");
+ }
+
+ action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+ mandatoryQueue_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+ responseToTCP_in.dequeue(clockEdge());
+ }
+
+ action(l_loadDone, "l", desc="local load done") {
+ assert(is_valid(cache_entry));
+ if (use_seq_not_coal) {
+ sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
+ } else {
+ coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
+ }
+ }
+
+ action(s_storeDone, "s", desc="local store done") {
+ assert(is_valid(cache_entry));
+
+ if (use_seq_not_coal) {
+ sequencer.writeCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
+ } else {
+ coalescer.writeCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
+ }
+ cache_entry.Dirty := true;
+ }
+
+ action(inv_invDone, "inv", desc="local inv done") {
+ if (use_seq_not_coal) {
+ DPRINTF(RubySlicc, "Sequencer does not define invCallback!\n");
+ assert(false);
+ } else {
+ coalescer.invCallback(address);
+ }
+ }
+
+ action(wb_wbDone, "wb", desc="local wb done") {
+ if (inFlush == true) {
+ Fcnt := Fcnt + 1;
+ if (Fcnt > WTcnt) {
+ if (use_seq_not_coal) {
+ DPRINTF(RubySlicc, "Sequencer does not define wbCallback!\n");
+ assert(false);
+ } else {
+ coalescer.wbCallback(address);
+ }
+ Fcnt := Fcnt - 1;
+ }
+ if (WTcnt == 0 && Fcnt == 0) {
+ inFlush := false;
+ APPEND_TRANSITION_COMMENT(" inFlush is false");
+ }
+ }
+ }
+
+ action(wd_wtDone, "wd", desc="writethrough done") {
+ WTcnt := WTcnt - 1;
+ if (inFlush == true) {
+ Fcnt := Fcnt -1;
+ }
+ assert(WTcnt >= 0);
+ APPEND_TRANSITION_COMMENT("write-- = ");
+ APPEND_TRANSITION_COMMENT(WTcnt);
+ }
+
+ action(dw_dirtyWrite, "dw", desc="update write mask"){
+ peek(mandatoryQueue_in, RubyRequest) {
+ cache_entry.DataBlk.copyPartial(in_msg.WTData,in_msg.writeMask);
+ cache_entry.writeMask.orMask(in_msg.writeMask);
+ }
+ }
+ action(w_writeCache, "w", desc="write data to cache") {
+ peek(responseToTCP_in, ResponseMsg) {
+ assert(is_valid(cache_entry));
+ DataBlock tmp := in_msg.DataBlk;
+ tmp.copyPartial(cache_entry.DataBlk,cache_entry.writeMask);
+ cache_entry.DataBlk := tmp;
+ }
+ }
+
+ action(mru_updateMRU, "mru", desc="Touch block for replacement policy") {
+ L1cache.setMRU(address);
+ }
+
+// action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+// mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+// }
+
+ action(z_stall, "z", desc="stall; built-in") {
+ // built-int action
+ }
+
+ // Transitions
+ // ArrayRead/Write assumptions:
+ // All requests read Tag Array
+ // TBE allocation write the TagArray to I
+ // TBE only checked on misses
+ // Stores will also write dirty bits in the tag
+ // WriteThroughs still need to use cache entry as staging buffer for wavefront
+
+ // Stalling transitions do NOT check the tag array...and if they do,
+ // they can cause a resource stall deadlock!
+
+ transition({A}, {Load, Store, Atomic, StoreThrough}) { //TagArrayRead} {
+ z_stall;
+ }
+
+ transition({M, V, L}, Load) {TagArrayRead, DataArrayRead} {
+ l_loadDone;
+ mru_updateMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, Load) {TagArrayRead} {
+ n_issueRdBlk;
+ p_popMandatoryQueue;
+ }
+
+ transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ mru_updateMRU;
+ at_atomicThrough;
+ p_popMandatoryQueue;
+ }
+
+ transition({M, W}, Atomic, A) {TagArrayRead, TagArrayWrite} {
+ wt_writeThrough;
+ t_allocateTBE;
+ at_atomicThrough;
+ ic_invCache;
+ }
+
+ transition(W, Load, I) {TagArrayRead, DataArrayRead} {
+ wt_writeThrough;
+ norl_issueRdBlkOrloadDone;
+ p_popMandatoryQueue;
+ }
+
+ transition({I}, StoreLocal, L) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ a_allocate;
+ dw_dirtyWrite;
+ s_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition({L, V}, StoreLocal, L) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ dw_dirtyWrite;
+ mru_updateMRU;
+ s_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, Store, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ a_allocate;
+ dw_dirtyWrite;
+ s_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(V, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ dw_dirtyWrite;
+ mru_updateMRU;
+ s_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition({M, W}, Store) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ dw_dirtyWrite;
+ mru_updateMRU;
+ s_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ //M,W should not see storeThrough
+ transition(I, StoreThrough) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ a_allocate;
+ dw_dirtyWrite;
+ s_storeDone;
+ wt_writeThrough;
+ ic_invCache;
+ p_popMandatoryQueue;
+ }
+
+ transition({V,L}, StoreThrough, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ dw_dirtyWrite;
+ s_storeDone;
+ wt_writeThrough;
+ ic_invCache;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, TCC_Ack, V) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} {
+ a_allocate;
+ w_writeCache;
+ l_loadDone;
+ pr_popResponseQueue;
+ }
+
+ transition(I, Bypass, I) {
+ rb_bypassDone;
+ pr_popResponseQueue;
+ }
+
+ transition(A, Bypass, I){
+ d_deallocateTBE;
+ wab_bypassDone;
+ pr_popResponseQueue;
+ }
+
+ transition(A, TCC_Ack, I) {TagArrayRead, DataArrayRead, DataArrayWrite} {
+ d_deallocateTBE;
+ a_allocate;
+ w_writeCache;
+ s_storeDone;
+ pr_popResponseQueue;
+ ic_invCache;
+ }
+
+ transition(V, TCC_Ack, V) {TagArrayRead, DataArrayRead, DataArrayWrite} {
+ w_writeCache;
+ l_loadDone;
+ pr_popResponseQueue;
+ }
+
+ transition({W, M}, TCC_Ack, M) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} {
+ w_writeCache;
+ l_loadDone;
+ pr_popResponseQueue;
+ }
+
+ transition({I, V}, Repl, I) {TagArrayRead, TagArrayWrite} {
+ ic_invCache;
+ }
+
+ transition({A}, Repl) {TagArrayRead, TagArrayWrite} {
+ ic_invCache;
+ }
+
+ transition({W, M}, Repl, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+ wt_writeThrough;
+ ic_invCache;
+ }
+
+ transition(L, Repl, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+ wt_writeThrough;
+ ic_invCache;
+ }
+
+ transition({W, M}, Flush, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+ sf_setFlush;
+ wt_writeThrough;
+ ic_invCache;
+ p_popMandatoryQueue;
+ }
+
+ transition({V, I, A, L},Flush) {TagArrayFlash} {
+ sf_setFlush;
+ wb_wbDone;
+ p_popMandatoryQueue;
+ }
+
+ transition({I, V}, Evict, I) {TagArrayFlash} {
+ inv_invDone;
+ p_popMandatoryQueue;
+ ic_invCache;
+ }
+
+ transition({W, M}, Evict, W) {TagArrayFlash} {
+ inv_invDone;
+ p_popMandatoryQueue;
+ }
+
+ transition({A, L}, Evict) {TagArrayFlash} {
+ inv_invDone;
+ p_popMandatoryQueue;
+ }
+
+ // TCC_AckWB only snoops TBE
+ transition({V, I, A, M, W, L}, TCC_AckWB) {
+ wd_wtDone;
+ wb_wbDone;
+ pr_popResponseQueue;
+ }
+}
diff --git a/src/mem/protocol/GPU_VIPER.slicc b/src/mem/protocol/GPU_VIPER.slicc
new file mode 100644
index 000000000..45f7f3477
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER.slicc
@@ -0,0 +1,9 @@
+protocol "GPU_VIPER";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-dir.sm";
+include "MOESI_AMD_Base-CorePair.sm";
+include "GPU_VIPER-TCP.sm";
+include "GPU_VIPER-SQC.sm";
+include "GPU_VIPER-TCC.sm";
+include "MOESI_AMD_Base-L3cache.sm";
diff --git a/src/mem/protocol/GPU_VIPER_Baseline.slicc b/src/mem/protocol/GPU_VIPER_Baseline.slicc
new file mode 100644
index 000000000..49bdce38c
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER_Baseline.slicc
@@ -0,0 +1,9 @@
+protocol "GPU_VIPER";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-probeFilter.sm";
+include "MOESI_AMD_Base-CorePair.sm";
+include "GPU_VIPER-TCP.sm";
+include "GPU_VIPER-SQC.sm";
+include "GPU_VIPER-TCC.sm";
+include "MOESI_AMD_Base-L3cache.sm";
diff --git a/src/mem/protocol/GPU_VIPER_Region-TCC.sm b/src/mem/protocol/GPU_VIPER_Region-TCC.sm
new file mode 100644
index 000000000..c3aef15a3
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER_Region-TCC.sm
@@ -0,0 +1,773 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor, Blake Hechtman
+ */
+
+/*
+ * This file is inherited from GPU_VIPER-TCC.sm and retains its structure.
+ * There are very few modifications in this file from the original VIPER TCC
+ */
+
+machine(MachineType:TCC, "TCC Cache")
+ : CacheMemory * L2cache;
+ bool WB; /*is this cache Writeback?*/
+ int regionBufferNum;
+ Cycles l2_request_latency := 50;
+ Cycles l2_response_latency := 20;
+
+ // From the TCPs or SQCs
+ MessageBuffer * requestFromTCP, network="From", virtual_network="1", ordered="true", vnet_type="request";
+ // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC.
+ MessageBuffer * responseToCore, network="To", virtual_network="3", ordered="true", vnet_type="response";
+ // From the NB
+ MessageBuffer * probeFromNB, network="From", virtual_network="0", ordered="false", vnet_type="request";
+ MessageBuffer * responseFromNB, network="From", virtual_network="2", ordered="false", vnet_type="response";
+ // To the NB
+ MessageBuffer * requestToNB, network="To", virtual_network="0", ordered="false", vnet_type="request";
+ MessageBuffer * responseToNB, network="To", virtual_network="2", ordered="false", vnet_type="response";
+ MessageBuffer * unblockToNB, network="To", virtual_network="4", ordered="false", vnet_type="unblock";
+
+ MessageBuffer * triggerQueue, ordered="true", random="false";
+{
+ // EVENTS
+ enumeration(Event, desc="TCC Events") {
+ // Requests coming from the Cores
+ RdBlk, desc="RdBlk event";
+ WrVicBlk, desc="L1 Write Through";
+ WrVicBlkBack, desc="L1 Write Back(dirty cache)";
+ Atomic, desc="Atomic Op";
+ AtomicDone, desc="AtomicOps Complete";
+ AtomicNotDone, desc="AtomicOps not Complete";
+ Data, desc="data messgae";
+ // Coming from this TCC
+ L2_Repl, desc="L2 Replacement";
+ // Probes
+ PrbInv, desc="Invalidating probe";
+ // Coming from Memory Controller
+ WBAck, desc="writethrough ack from memory";
+ }
+
+ // STATES
+ state_declaration(State, desc="TCC State", default="TCC_State_I") {
+ M, AccessPermission:Read_Write, desc="Modified(dirty cache only)";
+ W, AccessPermission:Read_Write, desc="Written(dirty cache only)";
+ V, AccessPermission:Read_Only, desc="Valid";
+ I, AccessPermission:Invalid, desc="Invalid";
+ IV, AccessPermission:Busy, desc="Waiting for Data";
+ WI, AccessPermission:Busy, desc="Waiting on Writethrough Ack";
+ A, AccessPermission:Busy, desc="Invalid waiting on atomic Data";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+
+ // STRUCTURES
+
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff from memory?)";
+ DataBlock DataBlk, desc="Data for the block";
+ WriteMask writeMask, desc="Dirty byte mask";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block";
+ bool Dirty, desc="Is the data dirty?";
+ bool Shared, desc="Victim hit by shared probe";
+ MachineID From, desc="Waiting for writeback from...";
+ NetDest Destination, desc="Data destination";
+ int numAtomics, desc="number remaining atomics";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs";
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+
+
+ // FUNCTION DEFINITIONS
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ MachineID getPeer(MachineID mach) {
+ return createMachineID(MachineType:RegionBuffer, intToID(regionBufferNum));
+ }
+
+ Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+ return static_cast(Entry, "pointer", L2cache.lookup(addr));
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ return getCacheEntry(addr).DataBlk;
+ }
+
+ bool presentOrAvail(Addr addr) {
+ return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if (is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes +
+ functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return TCC_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return TCC_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(TCC_State_to_permission(state));
+ }
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayRead,addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayWrite,addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayRead,addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayWrite,addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+
+ // ** OUT_PORTS **
+
+ // Three classes of ports
+ // Class 1: downward facing network links to NB
+ out_port(requestToNB_out, CPURequestMsg, requestToNB);
+ out_port(responseToNB_out, ResponseMsg, responseToNB);
+ out_port(unblockToNB_out, UnblockMsg, unblockToNB);
+
+ // Class 2: upward facing ports to GPU cores
+ out_port(responseToCore_out, ResponseMsg, responseToCore);
+
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+ //
+ // request queue going to NB
+ //
+
+
+// ** IN_PORTS **
+ in_port(triggerQueue_in, TiggerMsg, triggerQueue) {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (tbe.numAtomics == 0) {
+ trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+
+
+ in_port(responseFromNB_in, ResponseMsg, responseFromNB) {
+ if (responseFromNB_in.isReady(clockEdge())) {
+ peek(responseFromNB_in, ResponseMsg, block_on="addr") {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+ if(presentOrAvail(in_msg.addr)) {
+ trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.addr);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+ trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+
+ // Finally handling incoming requests (from TCP) and probes (from NB).
+
+ in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, NBProbeRequestMsg) {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ DPRINTF(RubySlicc, "machineID: %s\n", machineID);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+
+
+ in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) {
+ if (coreRequestNetwork_in.isReady(clockEdge())) {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ if(WB) {
+ if(presentOrAvail(in_msg.addr)) {
+ trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe);
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.addr);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else {
+ trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+ trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
+ trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+ // BEGIN ACTIONS
+
+ action(i_invL2, "i", desc="invalidate TCC cache block") {
+ if (is_valid(cache_entry)) {
+ L2cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ // Data available at TCC. Send the DATA to TCP
+ action(sd_sendData, "sd", desc="send Shared response") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Shared;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+
+ // Data was not available at TCC. So, TCC forwarded the request to
+ // directory and directory responded back with data. Now, forward the
+ // DATA to TCP and send the unblock ack back to directory.
+ action(sdr_sendDataResponse, "sdr", desc="send Shared response") {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination := tbe.Destination;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Shared;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ enqueue(unblockToNB_out, UnblockMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+
+ action(rd_requestData, "r", desc="Miss in L2, pass on") {
+ if(tbe.Destination.count()==1){
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.Shared := false; // unneeded for this request
+ out_msg.MessageSize := in_msg.MessageSize;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+ }
+
+ action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+ peek(responseFromNB_in, ResponseMsg) {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysWBAck;
+ out_msg.Destination.clear();
+ out_msg.Destination.add(in_msg.WTRequestor);
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(swb_sendWBAck, "swb", desc="send WB Ack") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysWBAck;
+ out_msg.Destination.clear();
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") {
+ peek(responseFromNB_in, ResponseMsg) {
+ enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:TDSysResp;
+ out_msg.Destination.add(in_msg.WTRequestor);
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.DataBlk := in_msg.DataBlk;
+ }
+ }
+ }
+ action(sd2rb_sendDone2RegionBuffer, "sd2rb", desc="Request finished, send done ack") {
+ enqueue(unblockToNB_out, UnblockMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.DoneAck := true;
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ if (is_valid(tbe)) {
+ out_msg.Dirty := tbe.Dirty;
+ } else {
+ out_msg.Dirty := false;
+ }
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(a_allocateBlock, "a", desc="allocate TCC block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L2cache.allocate(address, new Entry));
+ cache_entry.writeMask.clear();
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ if (is_invalid(tbe)) {
+ check_allocate(TBEs);
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.Destination.clear();
+ tbe.numAtomics := 0;
+ }
+ if (coreRequestNetwork_in.isReady(clockEdge())) {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
+ tbe.Destination.add(in_msg.Requestor);
+ }
+ }
+ }
+ }
+
+ action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
+ tbe.Destination.clear();
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") {
+ peek(responseFromNB_in, ResponseMsg) {
+ cache_entry.DataBlk := in_msg.DataBlk;
+ DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+ }
+ }
+
+ action(wdb_writeDirtyBytes, "wdb", desc="write data to TCC") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ cache_entry.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
+ cache_entry.writeMask.orMask(in_msg.writeMask);
+ DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+ }
+ }
+
+ action(wt_writeThrough, "wt", desc="write through data") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.WTRequestor := in_msg.Requestor;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Data;
+ out_msg.Type := CoherenceRequestType:WriteThrough;
+ out_msg.Dirty := true;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.writeMask.orMask(in_msg.writeMask);
+ }
+ }
+ }
+
+ action(wb_writeBack, "wb", desc="write back data") {
+ enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.WTRequestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Data;
+ out_msg.Type := CoherenceRequestType:WriteThrough;
+ out_msg.Dirty := true;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.writeMask.orMask(cache_entry.writeMask);
+ }
+ }
+
+ action(at_atomicThrough, "at", desc="write back data") {
+ peek(coreRequestNetwork_in, CPURequestMsg) {
+ enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.WTRequestor := in_msg.Requestor;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Data;
+ out_msg.Type := CoherenceRequestType:Atomic;
+ out_msg.Dirty := true;
+ out_msg.writeMask.orMask(in_msg.writeMask);
+ }
+ }
+ }
+
+ action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+ enqueue(responseToNB_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+ action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+ L2cache.setMRU(address);
+ }
+
+ action(p_popRequestQueue, "p", desc="pop request queue") {
+ coreRequestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="pop response queue") {
+ responseFromNB_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+ action(zz_recycleRequestQueue, "z", desc="stall"){
+ coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+
+ action(ina_incrementNumAtomics, "ina", desc="inc num atomics") {
+ tbe.numAtomics := tbe.numAtomics + 1;
+ }
+
+
+ action(dna_decrementNumAtomics, "dna", desc="dec num atomics") {
+ tbe.numAtomics := tbe.numAtomics - 1;
+ if (tbe.numAtomics==0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AtomicDone;
+ }
+ }
+ }
+
+ action(ptr_popTriggerQueue, "ptr", desc="pop Trigger") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ // END ACTIONS
+
+ // BEGIN TRANSITIONS
+ // transitions from base
+ // Assumptions for ArrayRead/Write
+ // TBE checked before tags
+ // Data Read/Write requires Tag Read
+
+ transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} {
+ zz_recycleRequestQueue;
+ }
+ transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) {TagArrayRead} {
+ zz_recycleRequestQueue;
+ }
+ transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} {
+ zz_recycleRequestQueue;
+ }
+ transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} {
+ sd_sendData;
+ ut_updateTag;
+ p_popRequestQueue;
+ }
+ transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} {
+ t_allocateTBE;
+ wb_writeBack;
+ }
+
+ transition(I, RdBlk, IV) {TagArrayRead} {
+ t_allocateTBE;
+ rd_requestData;
+ p_popRequestQueue;
+ }
+
+ transition(IV, RdBlk) {
+ t_allocateTBE;
+ rd_requestData;
+ p_popRequestQueue;
+ }
+
+ transition({V, I},Atomic, A) {TagArrayRead} {
+ i_invL2;
+ t_allocateTBE;
+ at_atomicThrough;
+ ina_incrementNumAtomics;
+ p_popRequestQueue;
+ }
+
+ transition(A, Atomic) {
+ at_atomicThrough;
+ ina_incrementNumAtomics;
+ p_popRequestQueue;
+ }
+
+ transition({M, W}, Atomic, WI) {TagArrayRead} {
+ t_allocateTBE;
+ wb_writeBack;
+ }
+
+ // Cahceblock stays in I state which implies
+ // this TCC is a write-no-allocate cache
+ transition(I, WrVicBlk) {TagArrayRead} {
+ wt_writeThrough;
+ p_popRequestQueue;
+ }
+
+ transition(V, WrVicBlk) {TagArrayRead, DataArrayWrite} {
+ ut_updateTag;
+ wdb_writeDirtyBytes;
+ wt_writeThrough;
+ p_popRequestQueue;
+ }
+
+ transition({V, M}, WrVicBlkBack, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ ut_updateTag;
+ swb_sendWBAck;
+ wdb_writeDirtyBytes;
+ p_popRequestQueue;
+ }
+
+ transition(W, WrVicBlkBack) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ ut_updateTag;
+ swb_sendWBAck;
+ wdb_writeDirtyBytes;
+ p_popRequestQueue;
+ }
+
+ transition(I, WrVicBlkBack, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ a_allocateBlock;
+ ut_updateTag;
+ swb_sendWBAck;
+ wdb_writeDirtyBytes;
+ p_popRequestQueue;
+ }
+
+ transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} {
+ t_allocateTBE;
+ wb_writeBack;
+ i_invL2;
+ }
+
+ transition({I, V}, L2_Repl, I) {TagArrayRead, TagArrayWrite} {
+ i_invL2;
+ }
+
+ transition({A, IV, WI}, L2_Repl) {
+ i_invL2;
+ }
+
+ transition({I, V}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition(M, PrbInv, W) {TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition(W, PrbInv) {TagArrayRead} {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition({A, IV, WI}, PrbInv) {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ a_allocateBlock;
+ ut_updateTag;
+ wcb_writeCacheBlock;
+ sdr_sendDataResponse;
+ sd2rb_sendDone2RegionBuffer;
+ pr_popResponseQueue;
+ dt_deallocateTBE;
+ }
+
+ transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+ a_allocateBlock;
+ ar_sendAtomicResponse;
+ sd2rb_sendDone2RegionBuffer;
+ dna_decrementNumAtomics;
+ pr_popResponseQueue;
+ }
+
+ transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} {
+ dt_deallocateTBE;
+ ptr_popTriggerQueue;
+ }
+
+ transition(A, AtomicNotDone) {TagArrayRead} {
+ ptr_popTriggerQueue;
+ }
+
+ //M,W should not see WBAck as the cache is in WB mode
+ //WBAcks do not need to check tags
+ transition({I, V, IV, A}, WBAck) {
+ w_sendResponseWBAck;
+ sd2rb_sendDone2RegionBuffer;
+ pr_popResponseQueue;
+ }
+
+ transition(WI, WBAck,I) {
+ sd2rb_sendDone2RegionBuffer;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+}
diff --git a/src/mem/protocol/GPU_VIPER_Region.slicc b/src/mem/protocol/GPU_VIPER_Region.slicc
new file mode 100644
index 000000000..cbfef9de3
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER_Region.slicc
@@ -0,0 +1,11 @@
+protocol "GPU_VIPER_Region";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-Region-CorePair.sm";
+include "MOESI_AMD_Base-L3cache.sm";
+include "MOESI_AMD_Base-Region-dir.sm";
+include "GPU_VIPER_Region-TCC.sm";
+include "GPU_VIPER-TCP.sm";
+include "GPU_VIPER-SQC.sm";
+include "MOESI_AMD_Base-RegionDir.sm";
+include "MOESI_AMD_Base-RegionBuffer.sm";
diff --git a/src/mem/protocol/MOESI_AMD_Base-CorePair.sm b/src/mem/protocol/MOESI_AMD_Base-CorePair.sm
new file mode 100644
index 000000000..76fe77230
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-CorePair.sm
@@ -0,0 +1,2904 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:CorePair, "CP-like Core Coherence")
+ : Sequencer * sequencer;
+ Sequencer * sequencer1;
+ CacheMemory * L1Icache;
+ CacheMemory * L1D0cache;
+ CacheMemory * L1D1cache;
+ CacheMemory * L2cache; // func mem logic looks in this CacheMemory
+ bool send_evictions := "False";
+ Cycles issue_latency := 5; // time to send data down to NB
+ Cycles l2_hit_latency := 18;
+
+ // BEGIN Core Buffers
+
+ // To the Network
+ MessageBuffer * requestFromCore, network="To", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseFromCore, network="To", virtual_network="2", vnet_type="response";
+ MessageBuffer * unblockFromCore, network="To", virtual_network="4", vnet_type="unblock";
+
+ // From the Network
+ MessageBuffer * probeToCore, network="From", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseToCore, network="From", virtual_network="2", vnet_type="response";
+
+ MessageBuffer * mandatoryQueue;
+
+ MessageBuffer * triggerQueue, ordered="true";
+
+ // END Core Buffers
+
+{
+ // BEGIN STATES
+ state_declaration(State, desc="Cache states", default="CorePair_State_I") {
+
+ // Base States
+ I, AccessPermission:Invalid, desc="Invalid";
+ S, AccessPermission:Read_Only, desc="Shared";
+ E0, AccessPermission:Read_Write, desc="Exclusive with Cluster 0 ownership";
+ E1, AccessPermission:Read_Write, desc="Exclusive with Cluster 1 ownership";
+ Es, AccessPermission:Read_Write, desc="Exclusive in core";
+ O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line";
+ Ms, AccessPermission:Read_Write, desc="Modified in core, both clusters may be sharing line";
+ M0, AccessPermission:Read_Write, desc="Modified with cluster ownership";
+ M1, AccessPermission:Read_Write, desc="Modified with cluster ownership";
+
+ // Transient States
+ I_M0, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+ I_M1, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+ I_M0M1, AccessPermission:Busy, desc="Was in I_M0, got a store request from other cluster as well";
+ I_M1M0, AccessPermission:Busy, desc="Was in I_M1, got a store request from other cluster as well";
+ I_M0Ms, AccessPermission:Busy, desc="Was in I_M0, got a load request from other cluster as well";
+ I_M1Ms, AccessPermission:Busy, desc="Was in I_M1, got a load request from other cluster as well";
+ I_E0S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+ I_E1S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+ I_ES, AccessPermission:Busy, desc="S_F got hit by invalidating probe, RdBlk response needs to go to both clusters";
+
+ IF_E0S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E0S but expecting a L2_to_L1D0 trigger, just drop when receive";
+ IF_E1S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E1S but expecting a L2_to_L1D1 trigger, just drop when receive";
+ IF_ES, AccessPermission:Busy, desc="same, but waiting for two fills";
+ IF0_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one";
+ IF1_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one";
+ F_S0, AccessPermission:Busy, desc="same, but going to S0 when trigger received";
+ F_S1, AccessPermission:Busy, desc="same, but going to S1 when trigger received";
+
+ ES_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for clean writeback ack";
+ MO_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for dirty writeback ack";
+ MO_S0, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS";
+ MO_S1, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS";
+ S_F0, AccessPermission:Read_Only, desc="Shared, filling L1";
+ S_F1, AccessPermission:Read_Only, desc="Shared, filling L1";
+ S_F, AccessPermission:Read_Only, desc="Shared, filling L1";
+ O_F0, AccessPermission:Read_Only, desc="Owned, filling L1";
+ O_F1, AccessPermission:Read_Only, desc="Owned, filling L1";
+ O_F, AccessPermission:Read_Only, desc="Owned, filling L1";
+ Si_F0, AccessPermission:Read_Only, desc="Shared, filling icache";
+ Si_F1, AccessPermission:Read_Only, desc="Shared, filling icache";
+ S_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+ S_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+ O_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+ O_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+ S0, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 0, waiting for response";
+ S1, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 1, waiting for response";
+
+ Es_F0, AccessPermission:Read_Write, desc="Es, Cluster read, filling";
+ Es_F1, AccessPermission:Read_Write, desc="Es, Cluster read, filling";
+ Es_F, AccessPermission:Read_Write, desc="Es, other cluster read, filling";
+ E0_F, AccessPermission:Read_Write, desc="E0, cluster read, filling";
+ E1_F, AccessPermission:Read_Write, desc="...";
+ E0_Es, AccessPermission:Read_Write, desc="...";
+ E1_Es, AccessPermission:Read_Write, desc="...";
+ Ms_F0, AccessPermission:Read_Write, desc="...";
+ Ms_F1, AccessPermission:Read_Write, desc="...";
+ Ms_F, AccessPermission:Read_Write, desc="...";
+ M0_F, AccessPermission:Read_Write, desc="...";
+ M0_Ms, AccessPermission:Read_Write, desc="...";
+ M1_F, AccessPermission:Read_Write, desc="...";
+ M1_Ms, AccessPermission:Read_Write, desc="...";
+
+ I_C, AccessPermission:Invalid, desc="Invalid, but waiting for WBAck from NB from canceled writeback";
+ S0_C, AccessPermission:Busy, desc="MO_S0 hit by invalidating probe, waiting for WBAck form NB for canceled WB";
+ S1_C, AccessPermission:Busy, desc="MO_S1 hit by invalidating probe, waiting for WBAck form NB for canceled WB";
+ S_C, AccessPermission:Busy, desc="S*_C got NB_AckS, still waiting for WBAck";
+
+ } // END STATES
+
+ // BEGIN EVENTS
+ enumeration(Event, desc="CP Events") {
+ // CP Initiated events
+ C0_Load_L1miss, desc="Cluster 0 load, L1 missed";
+ C0_Load_L1hit, desc="Cluster 0 load, L1 hit";
+ C1_Load_L1miss, desc="Cluster 1 load L1 missed";
+ C1_Load_L1hit, desc="Cluster 1 load L1 hit";
+ Ifetch0_L1hit, desc="Instruction fetch, hit in the L1";
+ Ifetch1_L1hit, desc="Instruction fetch, hit in the L1";
+ Ifetch0_L1miss, desc="Instruction fetch, missed in the L1";
+ Ifetch1_L1miss, desc="Instruction fetch, missed in the L1";
+ C0_Store_L1miss, desc="Cluster 0 store missed in L1";
+ C0_Store_L1hit, desc="Cluster 0 store hit in L1";
+ C1_Store_L1miss, desc="Cluster 1 store missed in L1";
+ C1_Store_L1hit, desc="Cluster 1 store hit in L1";
+ // NB Initiated events
+ NB_AckS, desc="NB Ack to Core Request";
+ NB_AckM, desc="NB Ack to Core Request";
+ NB_AckE, desc="NB Ack to Core Request";
+
+ NB_AckWB, desc="NB Ack for writeback";
+
+ // Memory System initiatied events
+ L1I_Repl, desc="Replace address from L1I"; // Presumed clean
+ L1D0_Repl, desc="Replace address from L1D0"; // Presumed clean
+ L1D1_Repl, desc="Replace address from L1D1"; // Presumed clean
+ L2_Repl, desc="Replace address from L2";
+
+ L2_to_L1D0, desc="L1 fill from L2";
+ L2_to_L1D1, desc="L1 fill from L2";
+ L2_to_L1I, desc="L1 fill from L2";
+
+ // Probe Events
+ PrbInvData, desc="probe, return O or M data";
+ PrbInv, desc="probe, no need for data";
+ PrbShrData, desc="probe downgrade, return O or M data";
+
+ } // END EVENTS
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ L1D0DataArrayRead, desc="Read the data array";
+ L1D0DataArrayWrite, desc="Write the data array";
+ L1D0TagArrayRead, desc="Read the data array";
+ L1D0TagArrayWrite, desc="Write the data array";
+ L1D1DataArrayRead, desc="Read the data array";
+ L1D1DataArrayWrite, desc="Write the data array";
+ L1D1TagArrayRead, desc="Read the data array";
+ L1D1TagArrayWrite, desc="Write the data array";
+ L1IDataArrayRead, desc="Read the data array";
+ L1IDataArrayWrite, desc="Write the data array";
+ L1ITagArrayRead, desc="Read the data array";
+ L1ITagArrayWrite, desc="Write the data array";
+ L2DataArrayRead, desc="Read the data array";
+ L2DataArrayWrite, desc="Write the data array";
+ L2TagArrayRead, desc="Read the data array";
+ L2TagArrayWrite, desc="Write the data array";
+ }
+
+
+ // BEGIN STRUCTURE DEFINITIONS
+
+
+ // Cache Entry
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff than memory)?";
+ DataBlock DataBlk, desc="data for the block";
+ bool FromL2, default="false", desc="block just moved from L2";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
+ bool Dirty, desc="Is the data dirty (different than memory)?";
+ int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
+ bool Shared, desc="Victim hit by shared probe";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<CorePair_TBE>", constructor="m_number_of_TBEs";
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ // END STRUCTURE DEFINITIONS
+
+ // BEGIN INTERNAL FUNCTIONS
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ bool addressInCore(Addr addr) {
+ return (L2cache.isTagPresent(addr) || L1Icache.isTagPresent(addr) || L1D0cache.isTagPresent(addr) || L1D1cache.isTagPresent(addr));
+ }
+
+ Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+ Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address));
+ return L2cache_entry;
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return tbe.DataBlk;
+ } else {
+ return getCacheEntry(addr).DataBlk;
+ }
+ }
+
+ Entry getL1CacheEntry(Addr addr, int cluster), return_by_pointer="yes" {
+ if (cluster == 0) {
+ Entry L1D0_entry := static_cast(Entry, "pointer", L1D0cache.lookup(addr));
+ return L1D0_entry;
+ } else {
+ Entry L1D1_entry := static_cast(Entry, "pointer", L1D1cache.lookup(addr));
+ return L1D1_entry;
+ }
+ }
+
+ Entry getICacheEntry(Addr addr), return_by_pointer="yes" {
+ Entry c_entry := static_cast(Entry, "pointer", L1Icache.lookup(addr));
+ return c_entry;
+ }
+
+ bool presentOrAvail2(Addr addr) {
+ return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+ }
+
+ bool presentOrAvailI(Addr addr) {
+ return L1Icache.isTagPresent(addr) || L1Icache.cacheAvail(addr);
+ }
+
+ bool presentOrAvailD0(Addr addr) {
+ return L1D0cache.isTagPresent(addr) || L1D0cache.cacheAvail(addr);
+ }
+
+ bool presentOrAvailD1(Addr addr) {
+ return L1D1cache.isTagPresent(addr) || L1D1cache.cacheAvail(addr);
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if(is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return CorePair_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return CorePair_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(CorePair_State_to_permission(state));
+ }
+ }
+
+ MachineType testAndClearLocalHit(Entry cache_entry) {
+ assert(is_valid(cache_entry));
+ if (cache_entry.FromL2) {
+ cache_entry.FromL2 := false;
+ return MachineType:L2Cache;
+ } else {
+ return MachineType:L1Cache;
+ }
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L1D0DataArrayRead) {
+ L1D0cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L1D0DataArrayWrite) {
+ L1D0cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L1D0TagArrayRead) {
+ L1D0cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L1D0TagArrayWrite) {
+ L1D0cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ } else if (request_type == RequestType:L1D1DataArrayRead) {
+ L1D1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L1D1DataArrayWrite) {
+ L1D1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L1D1TagArrayRead) {
+ L1D1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L1D1TagArrayWrite) {
+ L1D1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ } else if (request_type == RequestType:L1IDataArrayRead) {
+ L1Icache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L1IDataArrayWrite) {
+ L1Icache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L1ITagArrayRead) {
+ L1Icache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L1ITagArrayWrite) {
+ L1Icache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ } else if (request_type == RequestType:L2DataArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L2DataArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L2TagArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L2TagArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L2DataArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L2DataArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L2TagArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L2TagArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1D0DataArrayRead) {
+ return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1D0DataArrayWrite) {
+ return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1D0TagArrayRead) {
+ return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1D0TagArrayWrite) {
+ return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1D1DataArrayRead) {
+ return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1D1DataArrayWrite) {
+ return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1D1TagArrayRead) {
+ return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1D1TagArrayWrite) {
+ return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1IDataArrayRead) {
+ return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1IDataArrayWrite) {
+ return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1ITagArrayRead) {
+ return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1ITagArrayWrite) {
+ return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+
+ } else {
+ return true;
+ }
+ }
+
+ // END INTERNAL FUNCTIONS
+
+ // ** OUT_PORTS **
+
+ out_port(requestNetwork_out, CPURequestMsg, requestFromCore);
+ out_port(responseNetwork_out, ResponseMsg, responseFromCore);
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+ out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
+
+ // ** IN_PORTS **
+
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue, block_on="addr") {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == TriggerType:L2_to_L1) {
+ if (in_msg.Dest == CacheId:L1I) {
+ trigger(Event:L2_to_L1I, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Dest == CacheId:L1D0) {
+ trigger(Event:L2_to_L1D0, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Dest == CacheId:L1D1) {
+ trigger(Event:L2_to_L1D1, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("unexpected trigger dest");
+ }
+ }
+ }
+ }
+ }
+
+
+ in_port(probeNetwork_in, NBProbeRequestMsg, probeToCore) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, NBProbeRequestMsg, block_on="addr") {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == ProbeRequestType:PrbInv) {
+ if (in_msg.ReturnData) {
+ trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+ assert(in_msg.ReturnData);
+ trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+
+ // ResponseNetwork
+ in_port(responseToCore_in, ResponseMsg, responseToCore) {
+ if (responseToCore_in.isReady(clockEdge())) {
+ peek(responseToCore_in, ResponseMsg, block_on="addr") {
+
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+ if (in_msg.State == CoherenceState:Modified) {
+ trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.State == CoherenceState:Shared) {
+ trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.State == CoherenceState:Exclusive) {
+ trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+ trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+
+ // Nothing from the Unblock Network
+
+ // Mandatory Queue
+ in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+ if (mandatoryQueue_in.isReady(clockEdge())) {
+ peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+
+ Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+ TBE tbe := TBEs.lookup(in_msg.LineAddress);
+
+ if (in_msg.Type == RubyRequestType:IFETCH) {
+ // FETCH ACCESS
+
+ if (L1Icache.isTagPresent(in_msg.LineAddress)) {
+ if (mod(in_msg.contextId, 2) == 0) {
+ trigger(Event:Ifetch0_L1hit, in_msg.LineAddress, cache_entry, tbe);
+ } else {
+ trigger(Event:Ifetch1_L1hit, in_msg.LineAddress, cache_entry, tbe);
+ }
+ } else {
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ if (presentOrAvailI(in_msg.LineAddress)) {
+ if (mod(in_msg.contextId, 2) == 0) {
+ trigger(Event:Ifetch0_L1miss, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ trigger(Event:Ifetch1_L1miss, in_msg.LineAddress, cache_entry,
+ tbe);
+ }
+ } else {
+ Addr victim := L1Icache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:L1I_Repl, victim,
+ getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else { // Not present or avail in L2
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ }
+ } else {
+ // DATA ACCESS
+ if (mod(in_msg.contextId, 2) == 1) {
+ if (L1D1cache.isTagPresent(in_msg.LineAddress)) {
+ if (in_msg.Type == RubyRequestType:LD) {
+ trigger(Event:C1_Load_L1hit, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ // Stores must write through, make sure L2 avail.
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ trigger(Event:C1_Store_L1hit, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ }
+ } else {
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ if (presentOrAvailD1(in_msg.LineAddress)) {
+ if (in_msg.Type == RubyRequestType:LD) {
+ trigger(Event:C1_Load_L1miss, in_msg.LineAddress,
+ cache_entry, tbe);
+ } else {
+ trigger(Event:C1_Store_L1miss, in_msg.LineAddress,
+ cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L1D1cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:L1D1_Repl, victim,
+ getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else { // not present or avail in L2
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ }
+ } else {
+ Entry L1D0cache_entry := getL1CacheEntry(in_msg.LineAddress, 0);
+ if (is_valid(L1D0cache_entry)) {
+ if (in_msg.Type == RubyRequestType:LD) {
+ trigger(Event:C0_Load_L1hit, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ trigger(Event:C0_Store_L1hit, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ }
+ } else {
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ if (presentOrAvailD0(in_msg.LineAddress)) {
+ if (in_msg.Type == RubyRequestType:LD) {
+ trigger(Event:C0_Load_L1miss, in_msg.LineAddress,
+ cache_entry, tbe);
+ } else {
+ trigger(Event:C0_Store_L1miss, in_msg.LineAddress,
+ cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L1D0cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:L1D0_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ // ACTIONS
+ action(ii_invIcache, "ii", desc="invalidate iCache") {
+ if (L1Icache.isTagPresent(address)) {
+ L1Icache.deallocate(address);
+ }
+ }
+
+ action(i0_invCluster, "i0", desc="invalidate cluster 0") {
+ if (L1D0cache.isTagPresent(address)) {
+ L1D0cache.deallocate(address);
+ }
+ }
+
+ action(i1_invCluster, "i1", desc="invalidate cluster 1") {
+ if (L1D1cache.isTagPresent(address)) {
+ L1D1cache.deallocate(address);
+ }
+ }
+
+ action(ib_invBothClusters, "ib", desc="invalidate both clusters") {
+ if (L1D0cache.isTagPresent(address)) {
+ L1D0cache.deallocate(address);
+ }
+ if (L1D1cache.isTagPresent(address)) {
+ L1D1cache.deallocate(address);
+ }
+ }
+
+ action(i2_invL2, "i2", desc="invalidate L2") {
+ if(is_valid(cache_entry)) {
+ L2cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(mru_setMRU, "mru", desc="Update LRU state") {
+ L2cache.setMRU(address);
+ }
+
+ action(mruD1_setD1cacheMRU, "mruD1", desc="Update LRU state") {
+ L1D1cache.setMRU(address);
+ }
+
+ action(mruD0_setD0cacheMRU, "mruD0", desc="Update LRU state") {
+ L1D0cache.setMRU(address);
+ }
+
+ action(mruI_setIcacheMRU, "mruI", desc="Update LRU state") {
+ L1Icache.setMRU(address);
+ }
+
+ action(n_issueRdBlk, "n", desc="Issue RdBlk") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlk;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ DPRINTF(RubySlicc,"%s\n",out_msg.Destination);
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkM;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkS;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(vd_victim, "vd", desc="Victimize M/O L2 Data") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ assert(is_valid(cache_entry));
+ out_msg.DataBlk := cache_entry.DataBlk;
+ assert(cache_entry.Dirty);
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicDirty;
+ out_msg.InitialRequestTime := curCycle();
+ if (cache_entry.CacheState == State:O) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ }
+ }
+
+ action(vc_victim, "vc", desc="Victimize E/S L2 Data") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicClean;
+ out_msg.InitialRequestTime := curCycle();
+ if (cache_entry.CacheState == State:S) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ }
+ }
+
+ action(a0_allocateL1D, "a0", desc="Allocate L1D0 Block") {
+ if (L1D0cache.isTagPresent(address) == false) {
+ L1D0cache.allocateVoid(address, new Entry);
+ }
+ }
+
+ action(a1_allocateL1D, "a1", desc="Allocate L1D1 Block") {
+ if (L1D1cache.isTagPresent(address) == false) {
+ L1D1cache.allocateVoid(address, new Entry);
+ }
+ }
+
+ action(ai_allocateL1I, "ai", desc="Allocate L1I Block") {
+ if (L1Icache.isTagPresent(address) == false) {
+ L1Icache.allocateVoid(address, new Entry);
+ }
+ }
+
+ action(a2_allocateL2, "a2", desc="Allocate L2 Block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L2cache.allocate(address, new Entry));
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ assert(is_valid(cache_entry));
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs
+ tbe.Dirty := cache_entry.Dirty;
+ tbe.Shared := false;
+ }
+
+ action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+ mandatoryQueue_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+ responseToCore_in.dequeue(clockEdge());
+ }
+
+ action(pt_popTriggerQueue, "pt", desc="Pop Trigger Queue") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+
+ action(il0_loadDone, "il0", desc="Cluster 0 i load done") {
+ Entry entry := getICacheEntry(address);
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ assert(is_valid(entry));
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer.readCallback(address,
+ l2entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ }
+
+ action(il1_loadDone, "il1", desc="Cluster 1 i load done") {
+ Entry entry := getICacheEntry(address);
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ assert(is_valid(entry));
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer1.readCallback(address,
+ l2entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ }
+
+ action(l0_loadDone, "l0", desc="Cluster 0 load done") {
+ Entry entry := getL1CacheEntry(address, 0);
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ assert(is_valid(entry));
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer.readCallback(address,
+ l2entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ }
+
+ action(l1_loadDone, "l1", desc="Cluster 1 load done") {
+ Entry entry := getL1CacheEntry(address, 1);
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ assert(is_valid(entry));
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer1.readCallback(address,
+ l2entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ }
+
+ action(xl0_loadDone, "xl0", desc="Cluster 0 load done") {
+ peek(responseToCore_in, ResponseMsg) {
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ DPRINTF(ProtocolTrace, "CP Load Done 0 -- address %s, data: %s\n", address, l2entry.DataBlk);
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer.readCallback(address,
+ l2entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ }
+
+ action(xl1_loadDone, "xl1", desc="Cluster 1 load done") {
+ peek(responseToCore_in, ResponseMsg) {
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer1.readCallback(address,
+ l2entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ }
+
+ action(xi0_loadDone, "xi0", desc="Cluster 0 i-load done") {
+ peek(responseToCore_in, ResponseMsg) {
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer.readCallback(address,
+ l2entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ }
+
+ action(xi1_loadDone, "xi1", desc="Cluster 1 i-load done") {
+ peek(responseToCore_in, ResponseMsg) {
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer1.readCallback(address,
+ l2entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ }
+
+ action(s0_storeDone, "s0", desc="Cluster 0 store done") {
+ Entry entry := getL1CacheEntry(address, 0);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ sequencer.writeCallback(address,
+ cache_entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ cache_entry.Dirty := true;
+ entry.DataBlk := cache_entry.DataBlk;
+ entry.Dirty := true;
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ }
+
+ action(s1_storeDone, "s1", desc="Cluster 1 store done") {
+ Entry entry := getL1CacheEntry(address, 1);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ sequencer1.writeCallback(address,
+ cache_entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ cache_entry.Dirty := true;
+ entry.Dirty := true;
+ entry.DataBlk := cache_entry.DataBlk;
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ }
+
+ action(xs0_storeDone, "xs0", desc="Cluster 0 store done") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getL1CacheEntry(address, 0);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ sequencer.writeCallback(address,
+ cache_entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ cache_entry.Dirty := true;
+ entry.Dirty := true;
+ entry.DataBlk := cache_entry.DataBlk;
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ }
+ }
+
+ action(xs1_storeDone, "xs1", desc="Cluster 1 store done") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getL1CacheEntry(address, 1);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ sequencer1.writeCallback(address,
+ cache_entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ cache_entry.Dirty := true;
+ entry.Dirty := true;
+ entry.DataBlk := cache_entry.DataBlk;
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ }
+ }
+
+ action(forward_eviction_to_cpu0, "fec0", desc="sends eviction information to processor0") {
+ if (send_evictions) {
+ DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+ sequencer.evictionCallback(address);
+ }
+ }
+
+ action(forward_eviction_to_cpu1, "fec1", desc="sends eviction information to processor1") {
+ if (send_evictions) {
+ DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+ sequencer1.evictionCallback(address);
+ }
+ }
+
+ action(ci_copyL2ToL1, "ci", desc="copy L2 data to L1") {
+ Entry entry := getICacheEntry(address);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.Dirty := cache_entry.Dirty;
+ entry.DataBlk := cache_entry.DataBlk;
+ entry.FromL2 := true;
+ }
+
+ action(c0_copyL2ToL1, "c0", desc="copy L2 data to L1") {
+ Entry entry := getL1CacheEntry(address, 0);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.Dirty := cache_entry.Dirty;
+ entry.DataBlk := cache_entry.DataBlk;
+ entry.FromL2 := true;
+ }
+
+ action(c1_copyL2ToL1, "c1", desc="copy L2 data to L1") {
+ Entry entry := getL1CacheEntry(address, 1);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.Dirty := cache_entry.Dirty;
+ entry.DataBlk := cache_entry.DataBlk;
+ entry.FromL2 := true;
+ }
+
+ action(fi_L2ToL1, "fi", desc="L2 to L1 inst fill") {
+ enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L2_to_L1;
+ out_msg.Dest := CacheId:L1I;
+ }
+ }
+
+ action(f0_L2ToL1, "f0", desc="L2 to L1 data fill") {
+ enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L2_to_L1;
+ out_msg.Dest := CacheId:L1D0;
+ }
+ }
+
+ action(f1_L2ToL1, "f1", desc="L2 to L1 data fill") {
+ enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L2_to_L1;
+ out_msg.Dest := CacheId:L1D1;
+ }
+ }
+
+ action(wi_writeIcache, "wi", desc="write data to icache (and l2)") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getICacheEntry(address);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.DataBlk := in_msg.DataBlk;
+ entry.Dirty := in_msg.Dirty;
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(w0_writeDcache, "w0", desc="write data to dcache 0 (and l2)") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getL1CacheEntry(address, 0);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ DPRINTF(ProtocolTrace, "CP writeD0: address %s, data: %s\n", address, in_msg.DataBlk);
+ entry.DataBlk := in_msg.DataBlk;
+ entry.Dirty := in_msg.Dirty;
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(w1_writeDcache, "w1", desc="write data to dcache 1 (and l2)") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getL1CacheEntry(address, 1);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.DataBlk := in_msg.DataBlk;
+ entry.Dirty := in_msg.Dirty;
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+ peek(responseToCore_in, ResponseMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:StaleNotif;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(wb_data, "wb", desc="write back data") {
+ peek(responseToCore_in, ResponseMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUData;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Shared) {
+ out_msg.NbReqShared := true;
+ } else {
+ out_msg.NbReqShared := false;
+ }
+ out_msg.State := CoherenceState:Shared; // faux info
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.Dirty := false;
+ out_msg.Ntsl := true;
+ out_msg.Hit := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(ph_sendProbeResponseHit, "ph", desc="send probe ack PrbShrData, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ assert(addressInCore(address) || is_valid(tbe));
+ out_msg.Dirty := false; // only true if sending back data i think
+ out_msg.Hit := true;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pb_sendProbeResponseBackprobe, "pb", desc="send probe ack PrbShrData, no data, check for L1 residence") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ if (addressInCore(address)) {
+ out_msg.Hit := true;
+ } else {
+ out_msg.Hit := false;
+ }
+ out_msg.Dirty := false; // not sending back data, so def. not dirty
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.DataBlk := cache_entry.DataBlk;
+ assert(cache_entry.Dirty);
+ out_msg.Dirty := true;
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.DataBlk := cache_entry.DataBlk;
+ assert(cache_entry.Dirty);
+ out_msg.Dirty := true;
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.DataBlk := tbe.DataBlk;
+ assert(tbe.Dirty);
+ out_msg.Dirty := true;
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(s_setSharedFlip, "s", desc="hit by shared probe, status may be different") {
+ assert(is_valid(tbe));
+ tbe.Shared := true;
+ }
+
+ action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+ enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(l2m_profileMiss, "l2m", desc="l2m miss profile") {
+ ++L2cache.demand_misses;
+ }
+
+ action(l10m_profileMiss, "l10m", desc="l10m miss profile") {
+ ++L1D0cache.demand_misses;
+ }
+
+ action(l11m_profileMiss, "l11m", desc="l11m miss profile") {
+ ++L1D1cache.demand_misses;
+ }
+
+ action(l1im_profileMiss, "l1lm", desc="l1im miss profile") {
+ ++L1Icache.demand_misses;
+ }
+
+ action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
+ probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(xx_recycleResponseQueue, "xx", desc="recycle response queue") {
+ responseToCore_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+ mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ // END ACTIONS
+
+ // BEGIN TRANSITIONS
+
+ // transitions from base
+ transition(I, C0_Load_L1miss, I_E0S) {L1D0TagArrayRead, L2TagArrayRead} {
+ // track misses, if implemented
+ // since in I state, L2 miss as well
+ l2m_profileMiss;
+ l10m_profileMiss;
+ a0_allocateL1D;
+ a2_allocateL2;
+ i1_invCluster;
+ ii_invIcache;
+ n_issueRdBlk;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, C1_Load_L1miss, I_E1S) {L1D1TagArrayRead, L2TagArrayRead} {
+ // track misses, if implemented
+ // since in I state, L2 miss as well
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ a2_allocateL2;
+ i0_invCluster;
+ ii_invIcache;
+ n_issueRdBlk;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, Ifetch0_L1miss, S0) {L1ITagArrayRead,L2TagArrayRead} {
+ // track misses, if implemented
+ // L2 miss as well
+ l2m_profileMiss;
+ l1im_profileMiss;
+ ai_allocateL1I;
+ a2_allocateL2;
+ ib_invBothClusters;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} {
+ // track misses, if implemented
+ // L2 miss as well
+ l2m_profileMiss;
+ l1im_profileMiss;
+ ai_allocateL1I;
+ a2_allocateL2;
+ ib_invBothClusters;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, C0_Store_L1miss, I_M0) {L1D0TagArrayRead, L2TagArrayRead} {
+ l2m_profileMiss;
+ l10m_profileMiss;
+ a0_allocateL1D;
+ a2_allocateL2;
+ i1_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, C1_Store_L1miss, I_M1) {L1D0TagArrayRead, L2TagArrayRead} {
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ a2_allocateL2;
+ i0_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(S, C0_Load_L1miss, S_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(S, C1_Load_L1miss, S_F1) {L1D1TagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(S, Ifetch0_L1miss, Si_F0) {L1ITagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l1im_profileMiss;
+ ai_allocateL1I;
+ fi_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(S, Ifetch1_L1miss, Si_F1) {L1ITagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+ l1im_profileMiss;
+ ai_allocateL1I;
+ fi_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition({S}, {C0_Store_L1hit, C0_Store_L1miss}, S_M0) {L1D0TagArrayRead, L2TagArrayRead} {
+ l2m_profileMiss;
+ l10m_profileMiss;
+ a0_allocateL1D;
+ mruD0_setD0cacheMRU;
+ i1_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition({S}, {C1_Store_L1hit, C1_Store_L1miss}, S_M1) {L1D1TagArrayRead, L2TagArrayRead} {
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ mruD1_setD1cacheMRU;
+ i0_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(Es, C0_Load_L1miss, Es_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { // can this be folded with S_F?
+ a0_allocateL1D;
+ l10m_profileMiss;
+ f0_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(Es, C1_Load_L1miss, Es_F1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} { // can this be folded with S_F?
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(Es, Ifetch0_L1miss, S0) {L1ITagArrayRead, L1ITagArrayWrite, L2TagArrayRead, L2TagArrayWrite} {
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ ib_invBothClusters;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(Es, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} {
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ ib_invBothClusters;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ // THES SHOULD NOT BE INSTANTANEOUS BUT OH WELL FOR NOW
+ transition(Es, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+ a0_allocateL1D;
+ i1_invCluster;
+ s0_storeDone; // instantaneous L1/L2 dirty - no writethrough delay
+ mruD0_setD0cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(Es, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+ a1_allocateL1D;
+ i0_invCluster;
+ s1_storeDone;
+ mruD1_setD1cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, C0_Load_L1miss, E0_F) {L1D0TagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, C1_Load_L1miss, E0_Es) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, Ifetch0_L1miss, S0) {L2TagArrayRead, L1ITagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ i0_invCluster;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ i0_invCluster;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+ a0_allocateL1D;
+ s0_storeDone;
+ mruD0_setD0cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, C1_Store_L1miss, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1TagArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ i0_invCluster;
+ s1_storeDone;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, C1_Load_L1miss, E1_F) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, C0_Load_L1miss, E1_Es) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l11m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ i1_invCluster;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, Ifetch0_L1miss, S0) {L2TagArrayRead, L1ITagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ i1_invCluster;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite} {
+ a1_allocateL1D;
+ s1_storeDone;
+ mruD1_setD1cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, C0_Store_L1miss, M0) {L1D0TagArrayRead, L2TagArrayRead, L2TagArrayWrite, L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ i1_invCluster;
+ s0_storeDone;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition({O}, {C0_Store_L1hit, C0_Store_L1miss}, O_M0) {L1D0TagArrayRead,L2TagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue CtoD
+ l10m_profileMiss;
+ a0_allocateL1D;
+ mruD0_setD0cacheMRU;
+ i1_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition({O}, {C1_Store_L1hit, C1_Store_L1miss}, O_M1) {L1D1TagArrayRead, L2TagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l11m_profileMiss;
+ a1_allocateL1D;
+ mruD1_setD1cacheMRU;
+ i0_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(O, C0_Load_L1miss, O_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(O, C1_Load_L1miss, O_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(Ms, C0_Load_L1miss, Ms_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(Ms, C1_Load_L1miss, Ms_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition({Ms, M0, M1, O}, Ifetch0_L1miss, MO_S0) {L1ITagArrayRead, L2DataArrayRead, L2TagArrayRead} {
+ l2m_profileMiss; // permissions miss
+ l1im_profileMiss;
+ ai_allocateL1I;
+ t_allocateTBE;
+ ib_invBothClusters;
+ vd_victim;
+// i2_invL2;
+ p_popMandatoryQueue;
+ }
+
+ transition({Ms, M0, M1, O}, Ifetch1_L1miss, MO_S1) {L1ITagArrayRead, L2TagArrayRead, L2DataArrayRead } {
+ l2m_profileMiss; // permissions miss
+ l1im_profileMiss;
+ ai_allocateL1I;
+ t_allocateTBE;
+ ib_invBothClusters;
+ vd_victim;
+// i2_invL2;
+ p_popMandatoryQueue;
+ }
+
+ transition(Ms, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+ a0_allocateL1D;
+ i1_invCluster;
+ s0_storeDone;
+ mruD0_setD0cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(Ms, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+ a1_allocateL1D;
+ i0_invCluster;
+ s1_storeDone;
+ mruD1_setD1cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(M0, C0_Load_L1miss, M0_F) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(M0, C1_Load_L1miss, M0_Ms) {L2TagArrayRead, L2DataArrayRead,L1D0TagArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(M0, {C0_Store_L1hit, C0_Store_L1miss}) {L1D0TagArrayRead,L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead} {
+ a0_allocateL1D;
+ s0_storeDone;
+ mruD0_setD0cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(M0, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead, L2TagArrayWrite} {
+ a1_allocateL1D;
+ i0_invCluster;
+ s1_storeDone;
+ mruD1_setD1cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(M1, C0_Load_L1miss, M1_Ms) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(M1, C1_Load_L1miss, M1_F) {L1D1TagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+ a1_allocateL1D;
+ f1_L2ToL1;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(M1, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+ a0_allocateL1D;
+ i1_invCluster;
+ s0_storeDone;
+ mruD0_setD0cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(M1, {C1_Store_L1hit, C1_Store_L1miss}) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayWrite} {
+ a1_allocateL1D;
+ s1_storeDone;
+ mruD1_setD1cacheMRU;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ // end transitions from base
+
+ // Begin simple hit transitions
+ transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es,
+ Ms_F1, M0_Ms}, C0_Load_L1hit) {L1D0TagArrayRead, L1D0DataArrayRead} {
+ // track hits, if implemented
+ l0_loadDone;
+ mruD0_setD0cacheMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es,
+ Ms_F0, M1_Ms}, C1_Load_L1hit) {L1D1TagArrayRead, L1D1DataArrayRead} {
+ // track hits, if implemented
+ l1_loadDone;
+ mruD1_setD1cacheMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition({S, S_C, S_F0, S_F1, S_F}, Ifetch0_L1hit) {L1ITagArrayRead, L1IDataArrayRead} {
+ // track hits, if implemented
+ il0_loadDone;
+ mruI_setIcacheMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition({S, S_C, S_F0, S_F1, S_F}, Ifetch1_L1hit) {L1ITagArrayRead, L1IDataArrayWrite} {
+ // track hits, if implemented
+ il1_loadDone;
+ mruI_setIcacheMRU;
+ p_popMandatoryQueue;
+ }
+
+ // end simple hit transitions
+
+ // Transitions from transient states
+
+ // recycles
+ transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+ IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F,
+ E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, C0_Load_L1hit) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M1,
+ O_M1, S0, S1, I_C, S0_C, S1_C, S_C}, C0_Load_L1miss) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+ IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F,
+ E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, C1_Load_L1hit) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M0,
+ O_M0, S0, S1, I_C, S0_C, S1_C, S_C}, C1_Load_L1miss) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, {Ifetch0_L1hit, Ifetch1_L1hit}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES,
+ IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, ES_I, MO_I, S_F0, S_F1, S_F,
+ O_F0, O_F1, O_F, S_M0, S_M1, O_M0, O_M1, Es_F0, Es_F1, Es_F, E0_F,
+ E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, I_C,
+ S_C}, {Ifetch0_L1miss, Ifetch1_L1miss}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_E1S, IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, S_F1, O_F1,
+ Si_F0, Si_F1, S_M1, O_M1, S0, S1, Es_F1, E1_F, E0_Es, Ms_F1, M0_Ms,
+ M1_F, I_C, S0_C, S1_C, S_C}, {C0_Store_L1miss}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_E0S, IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1 S_F0, O_F0,
+ Si_F0, Si_F1, S_M0, O_M0, S0, S1, Es_F0, E0_F, E1_Es, Ms_F0, M0_F,
+ M1_Ms, I_C, S0_C, S1_C, S_C}, {C1_Store_L1miss}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+ IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M0, O_M0, Es_F0, Es_F1, Es_F, E0_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_Ms}, {C0_Store_L1hit}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+ IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M1,
+ O_M1, Es_F0, Es_F1, Es_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F,
+ M0_Ms, M1_F, M1_Ms}, {C1_Store_L1hit}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+ IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F,
+ E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, L1D0_Repl) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+ IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F,
+ E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, L1D1_Repl) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, L1I_Repl) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({S_C, S0_C, S1_C, S0, S1, Si_F0, Si_F1, I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, S_M0, O_M0, S_M1, O_M1, Es_F0, Es_F1, Es_F, E0_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, MO_S0, MO_S1, IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, L2_Repl) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, {NB_AckS,
+ PrbInvData, PrbInv, PrbShrData}) {} {
+ yy_recycleProbeQueue; // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary.
+ }
+
+ transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES}, NB_AckE) {} {
+ xx_recycleResponseQueue; // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary.
+ }
+
+ transition({E0_Es, E1_F, Es_F1}, C0_Load_L1miss, Es_F) {L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(S_F1, C0_Load_L1miss, S_F) {L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(O_F1, C0_Load_L1miss, O_F) {L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition({Ms_F1, M0_Ms, M1_F}, C0_Load_L1miss, Ms_F) {L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_M0, C1_Load_L1miss, I_M0Ms) {} {
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_M1, C0_Load_L1miss, I_M1Ms) {} {
+ l2m_profileMiss;
+ l10m_profileMiss;
+ a0_allocateL1D;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_M0, C1_Store_L1miss, I_M0M1) {} {
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_M1, C0_Store_L1miss, I_M1M0) {} {
+ l2m_profileMiss;
+ l10m_profileMiss;
+ a0_allocateL1D;
+ mru_setMRU;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_E0S, C1_Load_L1miss, I_ES) {} {
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_E1S, C0_Load_L1miss, I_ES) {} {
+ l2m_profileMiss;
+ l10m_profileMiss;
+ a0_allocateL1D;
+ p_popMandatoryQueue;
+ }
+
+ transition({E1_Es, E0_F, Es_F0}, C1_Load_L1miss, Es_F) {L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(S_F0, C1_Load_L1miss, S_F) {L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(O_F0, C1_Load_L1miss, O_F) {L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition({Ms_F0, M1_Ms, M0_F}, C1_Load_L1miss, Ms_F) { L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es, Ms_F1, M0_Ms}, L1D0_Repl) {L1D0TagArrayRead} {
+ i0_invCluster;
+ }
+
+ transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es, Ms_F0, M1_Ms}, L1D1_Repl) {L1D1TagArrayRead} {
+ i1_invCluster;
+ }
+
+ transition({S, S_C, S_F0, S_F1}, L1I_Repl) {L1ITagArrayRead} {
+ ii_invIcache;
+ }
+
+ transition({S, E0, E1, Es}, L2_Repl, ES_I) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead, L1D1TagArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ t_allocateTBE;
+ vc_victim;
+ ib_invBothClusters;
+ i2_invL2;
+ ii_invIcache;
+ }
+
+ transition({Ms, M0, M1, O}, L2_Repl, MO_I) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead, L1D1TagArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ t_allocateTBE;
+ vd_victim;
+ i2_invL2;
+ ib_invBothClusters; // nothing will happen for D0 on M1, vice versa
+ }
+
+ transition(S0, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ wi_writeIcache;
+ xi0_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(S1, NB_AckS, S) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ wi_writeIcache;
+ xi1_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(S0_C, NB_AckS, S_C) {L1D0DataArrayWrite,L2DataArrayWrite} {
+ wi_writeIcache;
+ xi0_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(S1_C, NB_AckS, S_C) {L1D1DataArrayWrite, L2DataArrayWrite} {
+ wi_writeIcache;
+ xi1_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M0, NB_AckM, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+ w0_writeDcache;
+ xs0_storeDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ w1_writeDcache;
+ xs1_storeDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ // THESE MO->M1 should not be instantaneous but oh well for now.
+ transition(I_M0M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ w0_writeDcache;
+ xs0_storeDone;
+ uu_sendUnblock;
+ i0_invCluster;
+ s1_storeDone;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M1M0, NB_AckM, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ w1_writeDcache;
+ xs1_storeDone;
+ uu_sendUnblock;
+ i1_invCluster;
+ s0_storeDone;
+ pr_popResponseQueue;
+ }
+
+ // Above shoudl be more like this, which has some latency to xfer to L1
+ transition(I_M0Ms, NB_AckM, M0_Ms) {L1D0DataArrayWrite,L2DataArrayWrite} {
+ w0_writeDcache;
+ xs0_storeDone;
+ uu_sendUnblock;
+ f1_L2ToL1;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M1Ms, NB_AckM, M1_Ms) {L1D1DataArrayWrite, L2DataArrayWrite} {
+ w1_writeDcache;
+ xs1_storeDone;
+ uu_sendUnblock;
+ f0_L2ToL1;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E0S, NB_AckE, E0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ w0_writeDcache;
+ xl0_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E1S, NB_AckE, E1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ w1_writeDcache;
+ xl1_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_ES, NB_AckE, Es) {L1D1DataArrayWrite, L1D1TagArrayWrite, L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite } {
+ w0_writeDcache;
+ xl0_loadDone;
+ w1_writeDcache;
+ xl1_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E0S, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+ w0_writeDcache;
+ xl0_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E1S, NB_AckS, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+ w1_writeDcache;
+ xl1_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_ES, NB_AckS, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+ w0_writeDcache;
+ xl0_loadDone;
+ w1_writeDcache;
+ xl1_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(S_F0, L2_to_L1D0, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(S_F1, L2_to_L1D1, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Si_F0, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ ci_copyL2ToL1;
+ mru_setMRU;
+ il0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Si_F1, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ ci_copyL2ToL1;
+ mru_setMRU;
+ il1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(S_F, L2_to_L1D0, S_F1) { L1D0DataArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(S_F, L2_to_L1D1, S_F0) { L1D1DataArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(O_F0, L2_to_L1D0, O) { L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(O_F1, L2_to_L1D1, O) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(O_F, L2_to_L1D0, O_F1) { L1D0DataArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(O_F, L2_to_L1D1, O_F0) { L1D1DataArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(M1_F, L2_to_L1D1, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(M0_F, L2_to_L1D0, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Ms_F0, L2_to_L1D0, Ms) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Ms_F1, L2_to_L1D1, Ms) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Ms_F, L2_to_L1D0, Ms_F1) {L1D0DataArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Ms_F, L2_to_L1D1, Ms_F0) {L1IDataArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(M1_Ms, L2_to_L1D0, Ms) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(M0_Ms, L2_to_L1D1, Ms) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Es_F0, L2_to_L1D0, Es) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Es_F1, L2_to_L1D1, Es) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Es_F, L2_to_L1D0, Es_F1) {L2TagArrayRead, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Es_F, L2_to_L1D1, Es_F0) {L2TagArrayRead, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(E0_F, L2_to_L1D0, E0) {L2TagArrayRead, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(E1_F, L2_to_L1D1, E1) {L2TagArrayRead, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(E1_Es, L2_to_L1D0, Es) {L2TagArrayRead, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ mru_setMRU;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(E0_Es, L2_to_L1D1, Es) {L2TagArrayRead, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ mru_setMRU;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(IF_E0S, L2_to_L1D0, I_E0S) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF_E1S, L2_to_L1D1, I_E1S) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF_ES, L2_to_L1D0, IF1_ES) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF_ES, L2_to_L1D1, IF0_ES) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF0_ES, L2_to_L1D0, I_ES) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF1_ES, L2_to_L1D1, I_ES) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(F_S0, L2_to_L1I, S0) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(F_S1, L2_to_L1I, S1) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition({S_M0, O_M0}, NB_AckM, M0) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ mru_setMRU;
+ xs0_storeDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition({S_M1, O_M1}, NB_AckM, M1) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ mru_setMRU;
+ xs1_storeDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(MO_I, NB_AckWB, I) {L2TagArrayWrite} {
+ wb_data;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(ES_I, NB_AckWB, I) {L2TagArrayWrite} {
+ wb_data;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(MO_S0, NB_AckWB, S0) {L2TagArrayWrite} {
+ wb_data;
+ i2_invL2;
+ a2_allocateL2;
+ d_deallocateTBE; // FOO
+ nS_issueRdBlkS;
+ pr_popResponseQueue;
+ }
+
+ transition(MO_S1, NB_AckWB, S1) {L2TagArrayWrite} {
+ wb_data;
+ i2_invL2;
+ a2_allocateL2;
+ d_deallocateTBE; // FOO
+ nS_issueRdBlkS;
+ pr_popResponseQueue;
+ }
+
+ // Writeback cancel "ack"
+ transition(I_C, NB_AckWB, I) {L2TagArrayWrite} {
+ ss_sendStaleNotification;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(S0_C, NB_AckWB, S0) {L2TagArrayWrite} {
+ ss_sendStaleNotification;
+ pr_popResponseQueue;
+ }
+
+ transition(S1_C, NB_AckWB, S1) {L2TagArrayWrite} {
+ ss_sendStaleNotification;
+ pr_popResponseQueue;
+ }
+
+ transition(S_C, NB_AckWB, S) {L2TagArrayWrite} {
+ ss_sendStaleNotification;
+ pr_popResponseQueue;
+ }
+
+ // Begin Probe Transitions
+
+ transition({Ms, M0, M1, O}, PrbInvData, I) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pd_sendProbeResponseData;
+ i2_invL2;
+ ib_invBothClusters;
+ pp_popProbeQueue;
+ }
+
+ transition({Es, E0, E1, S, I}, PrbInvData, I) {L2TagArrayRead, L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ ib_invBothClusters;
+ ii_invIcache; // only relevant for S
+ pp_popProbeQueue;
+ }
+
+ transition(S_C, PrbInvData, I_C) {L2TagArrayWrite} {
+ t_allocateTBE;
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, PrbInvData, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms, M0, M1, O, Es, E0, E1, S, I}, PrbInv, I) {L2TagArrayRead, L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2; // nothing will happen in I
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(S_C, PrbInv, I_C) {L2TagArrayWrite} {
+ t_allocateTBE;
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, PrbInv, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms, M0, M1, O}, PrbShrData, O) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({Es, E0, E1, S}, PrbShrData, S) {L2TagArrayRead, L2TagArrayWrite} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition(S_C, PrbShrData) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition({I, I_C}, PrbShrData) {L2TagArrayRead} {
+ pb_sendProbeResponseBackprobe;
+ pp_popProbeQueue;
+ }
+
+ transition({I_M0, I_E0S}, {PrbInv, PrbInvData}) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters; // must invalidate current data (only relevant for I_M0)
+ a0_allocateL1D; // but make sure there is room for incoming data when it arrives
+ pp_popProbeQueue;
+ }
+
+ transition({I_M1, I_E1S}, {PrbInv, PrbInvData}) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters; // must invalidate current data (only relevant for I_M1)
+ a1_allocateL1D; // but make sure there is room for incoming data when it arrives
+ pp_popProbeQueue;
+ }
+
+ transition({I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_ES}, {PrbInv, PrbInvData, PrbShrData}) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ a0_allocateL1D;
+ a1_allocateL1D;
+ pp_popProbeQueue;
+ }
+
+ transition({I_M0, I_E0S, I_M1, I_E1S}, PrbShrData) {} {
+ pb_sendProbeResponseBackprobe;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, PrbInvData, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbInvData, I_C) {} {
+ pdt_sendProbeResponseDataFromTBE;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbInv, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, PrbInv, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, PrbShrData, ES_I) {} {
+ ph_sendProbeResponseHit;
+ s_setSharedFlip;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbShrData, MO_I) {} {
+ pdt_sendProbeResponseDataFromTBE;
+ s_setSharedFlip;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_S0, PrbInvData, S0_C) {L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pdt_sendProbeResponseDataFromTBE;
+ i2_invL2;
+ a2_allocateL2;
+ d_deallocateTBE;
+ nS_issueRdBlkS;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_S1, PrbInvData, S1_C) {L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pdt_sendProbeResponseDataFromTBE;
+ i2_invL2;
+ a2_allocateL2;
+ d_deallocateTBE;
+ nS_issueRdBlkS;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_S0, PrbInv, S0_C) {L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ a2_allocateL2;
+ d_deallocateTBE;
+ nS_issueRdBlkS;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_S1, PrbInv, S1_C) {L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ a2_allocateL2;
+ d_deallocateTBE;
+ nS_issueRdBlkS;
+ pp_popProbeQueue;
+ }
+
+ transition({MO_S0, MO_S1}, PrbShrData) {} {
+ pdt_sendProbeResponseDataFromTBE;
+ s_setSharedFlip;
+ pp_popProbeQueue;
+ }
+
+ transition({S_F0, Es_F0, E0_F, E1_Es}, {PrbInvData, PrbInv}, IF_E0S) {}{
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ // invalidate everything you've got
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ // but make sure you have room for what you need from the fill
+ a0_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({S_F1, Es_F1, E1_F, E0_Es}, {PrbInvData, PrbInv}, IF_E1S) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ // invalidate everything you've got
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ // but make sure you have room for what you need from the fill
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({S_F, Es_F}, {PrbInvData, PrbInv}, IF_ES) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ // invalidate everything you've got
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ // but make sure you have room for what you need from the fill
+ a0_allocateL1D;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition(Si_F0, {PrbInvData, PrbInv}, F_S0) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ nS_issueRdBlkS;
+ pp_popProbeQueue;
+ }
+
+ transition(Si_F1, {PrbInvData, PrbInv}, F_S1) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ nS_issueRdBlkS;
+ pp_popProbeQueue;
+ }
+
+ transition({Es_F0, E0_F, E1_Es}, PrbShrData, S_F0) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition({Es_F1, E1_F, E0_Es}, PrbShrData, S_F1) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition(Es_F, PrbShrData, S_F) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition({S_F0, S_F1, S_F, Si_F0, Si_F1}, PrbShrData) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition(S_M0, PrbInvData, I_M0) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pim_sendProbeResponseInvMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition(O_M0, PrbInvData, I_M0) {L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pdm_sendProbeResponseDataMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S_M0, O_M0}, {PrbInv}, I_M0) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pim_sendProbeResponseInvMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition(S_M1, PrbInvData, I_M1) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pim_sendProbeResponseInvMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition(O_M1, PrbInvData, I_M1) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pdm_sendProbeResponseDataMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S_M1, O_M1}, {PrbInv}, I_M1) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pim_sendProbeResponseInvMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S0, S0_C}, {PrbInvData, PrbInv}) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S1, S1_C}, {PrbInvData, PrbInv}) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S_M0, S_M1}, PrbShrData) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition({O_M0, O_M1}, PrbShrData) {L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({S0, S1, S0_C, S1_C}, PrbShrData) {} {
+ pb_sendProbeResponseBackprobe;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F0, M0_F, M1_Ms, O_F0}, PrbInvData, IF_E0S) { L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pd_sendProbeResponseData;
+ ib_invBothClusters;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F1, M1_F, M0_Ms, O_F1}, PrbInvData, IF_E1S) {L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pd_sendProbeResponseData;
+ ib_invBothClusters;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F, O_F}, PrbInvData, IF_ES) {L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pd_sendProbeResponseData;
+ ib_invBothClusters;
+ i2_invL2;
+ a0_allocateL1D;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F0, M0_F, M1_Ms, O_F0}, PrbInv, IF_E0S) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F1, M1_F, M0_Ms, O_F1}, PrbInv, IF_E1S) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F, O_F}, PrbInv, IF_ES) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ i2_invL2;
+ a0_allocateL1D;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F0, M0_F, M1_Ms}, PrbShrData, O_F0) {L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F1, M1_F, M0_Ms}, PrbShrData, O_F1) {} {
+ }
+
+ transition({Ms_F}, PrbShrData, O_F) {L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({O_F0, O_F1, O_F}, PrbShrData) {L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ // END TRANSITIONS
+}
+
+
diff --git a/src/mem/protocol/MOESI_AMD_Base-L3cache.sm b/src/mem/protocol/MOESI_AMD_Base-L3cache.sm
new file mode 100644
index 000000000..479cf4e78
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-L3cache.sm
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:L3Cache, "L3")
+ : CacheMemory * L3cache;
+ WireBuffer * reqToDir;
+ WireBuffer * respToDir;
+ WireBuffer * l3UnblockToDir;
+ WireBuffer * reqToL3;
+ WireBuffer * probeToL3;
+ WireBuffer * respToL3;
+ Cycles l3_request_latency := 1;
+ Cycles l3_response_latency := 35;
+
+ // To the general response network
+ MessageBuffer * responseFromL3, network="To", virtual_network="2", ordered="false", vnet_type="response";
+
+ // From the general response network
+ MessageBuffer * responseToL3, network="From", virtual_network="2", ordered="false", vnet_type="response";
+
+{
+ // EVENTS
+ enumeration(Event, desc="L3 Events") {
+ // Requests coming from the Cores
+ RdBlk, desc="CPU RdBlk event";
+ RdBlkM, desc="CPU RdBlkM event";
+ RdBlkS, desc="CPU RdBlkS event";
+ CtoD, desc="Change to Dirty request";
+ WrVicBlk, desc="L2 Victim (dirty)";
+ WrVicBlkShared, desc="L2 Victim (dirty)";
+ ClVicBlk, desc="L2 Victim (clean)";
+ ClVicBlkShared, desc="L2 Victim (clean)";
+
+ CPUData, desc="WB data from CPU";
+ CPUDataShared, desc="WB data from CPU, NBReqShared 1";
+ StaleWB, desc="WB stale; no data";
+
+ L3_Repl, desc="L3 Replacement";
+
+ // Probes
+ PrbInvData, desc="Invalidating probe, return dirty data";
+ PrbInv, desc="Invalidating probe, no need to return data";
+ PrbShrData, desc="Downgrading probe, return data";
+
+ // Coming from Memory Controller
+ WBAck, desc="ack from memory";
+
+ CancelWB, desc="Cancel WB from L2";
+ }
+
+ // STATES
+ // Base States:
+ state_declaration(State, desc="L3 State", default="L3Cache_State_I") {
+ M, AccessPermission:Read_Write, desc="Modified"; // No other cache has copy, memory stale
+ O, AccessPermission:Read_Only, desc="Owned"; // Correct most recent copy, others may exist in S
+ E, AccessPermission:Read_Write, desc="Exclusive"; // Correct, most recent, and only copy (and == Memory)
+ S, AccessPermission:Read_Only, desc="Shared"; // Correct, most recent. If no one in O, then == Memory
+ I, AccessPermission:Invalid, desc="Invalid";
+
+ I_M, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
+ I_O, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
+ I_E, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
+ I_S, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
+ S_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to M";
+ S_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
+ S_E, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to E";
+ S_S, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to S";
+ E_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
+ E_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
+ E_E, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
+ E_S, AccessPermission:Busy, desc="Shared, received WrVicBlk, sent Ack, waiting for Data";
+ O_M, AccessPermission:Busy, desc="...";
+ O_O, AccessPermission:Busy, desc="...";
+ O_E, AccessPermission:Busy, desc="...";
+ O_S, AccessPermission:Busy, desc="...";
+ M_M, AccessPermission:Busy, desc="...";
+ M_O, AccessPermission:Busy, desc="...";
+ M_E, AccessPermission:Busy, desc="...";
+ M_S, AccessPermission:Busy, desc="...";
+ D_I, AccessPermission:Invalid, desc="drop WB data on the floor when receive";
+ MOD_I, AccessPermission:Busy, desc="drop WB data on the floor, waiting for WBAck from Mem";
+ MO_I, AccessPermission:Busy, desc="M or O, received L3_Repl, waiting for WBAck from Mem";
+ I_I, AccessPermission:Busy, desc="I_MO received L3_Repl";
+ I_CD, AccessPermission:Busy, desc="I_I received WBAck, now just waiting for CPUData";
+ I_C, AccessPermission:Invalid, desc="sent cancel, just waiting to receive mem wb ack so nothing gets confused";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+ // STRUCTURES
+
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff from memory?)";
+ DataBlock DataBlk, desc="Data for the block";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block";
+ bool Dirty, desc="Is the data dirty?";
+ bool Shared, desc="Victim hit by shared probe";
+ MachineID From, desc="Waiting for writeback from...";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<L3Cache_TBE>", constructor="m_number_of_TBEs";
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+
+
+ // FUNCTION DEFINITIONS
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+ return static_cast(Entry, "pointer", L3cache.lookup(addr));
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ return getCacheEntry(addr).DataBlk;
+ }
+
+ bool presentOrAvail(Addr addr) {
+ return L3cache.isTagPresent(addr) || L3cache.cacheAvail(addr);
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if (is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes +
+ functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return L3Cache_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return L3Cache_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(L3Cache_State_to_permission(state));
+ }
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ return true;
+ }
+
+
+ // OUT PORTS
+ out_port(requestNetwork_out, CPURequestMsg, reqToDir);
+ out_port(L3Resp_out, ResponseMsg, respToDir);
+ out_port(responseNetwork_out, ResponseMsg, responseFromL3);
+ out_port(unblockNetwork_out, UnblockMsg, l3UnblockToDir);
+
+ // IN PORTS
+ in_port(NBResponse_in, ResponseMsg, respToL3) {
+ if (NBResponse_in.isReady(clockEdge())) {
+ peek(NBResponse_in, ResponseMsg) {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+ trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ error("Error on NBResponse Type");
+ }
+ }
+ }
+ }
+
+ // Response Network
+ in_port(responseNetwork_in, ResponseMsg, responseToL3) {
+ if (responseNetwork_in.isReady(clockEdge())) {
+ peek(responseNetwork_in, ResponseMsg) {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:CPUData) {
+ if (in_msg.NbReqShared) {
+ trigger(Event:CPUDataShared, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:CPUData, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+ trigger(Event:StaleWB, in_msg.addr, cache_entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ error("Error on NBResponse Type");
+ }
+ }
+ }
+ }
+
+ // probe network
+ in_port(probeNetwork_in, NBProbeRequestMsg, probeToL3) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, NBProbeRequestMsg) {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ if (in_msg.Type == ProbeRequestType:PrbInv) {
+ if (in_msg.ReturnData) {
+ trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+ if (in_msg.ReturnData) {
+ trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Don't think I should get any of these");
+ }
+ }
+ }
+ }
+ }
+
+ // Request Network
+ in_port(requestNetwork_in, CPURequestMsg, reqToL3) {
+ if (requestNetwork_in.isReady(clockEdge())) {
+ peek(requestNetwork_in, CPURequestMsg) {
+ assert(in_msg.Destination.isElement(machineID));
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ if (in_msg.Type == CoherenceRequestType:RdBlk) {
+ trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+ trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+ trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+ if (presentOrAvail(in_msg.addr)) {
+ if (in_msg.Shared) {
+ trigger(Event:ClVicBlkShared, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:ClVicBlk, in_msg.addr, cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L3cache.cacheProbe(in_msg.addr);
+ trigger(Event:L3_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+ if (presentOrAvail(in_msg.addr)) {
+ if (in_msg.Shared) {
+ trigger(Event:WrVicBlkShared, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L3cache.cacheProbe(in_msg.addr);
+ trigger(Event:L3_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else if (in_msg.Type == CoherenceRequestType:WrCancel) {
+ if (is_valid(tbe) && tbe.From == in_msg.Requestor) {
+ trigger(Event:CancelWB, in_msg.addr, cache_entry, tbe);
+ } else {
+ requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+ }
+ }
+ }
+ }
+
+ // BEGIN ACTIONS
+
+ action(i_invL3, "i", desc="invalidate L3 cache block") {
+ if (is_valid(cache_entry)) {
+ L3cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(rm_sendResponseM, "rm", desc="send Modified response") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, l3_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.State := CoherenceState:Modified;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(rs_sendResponseS, "rs", desc="send Shared response") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, l3_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.State := CoherenceState:Shared;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+
+ action(r_requestToMem, "r", desc="Miss in L3, pass on") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(requestNetwork_out, CPURequestMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Shared := false; // unneeded for this request
+ out_msg.MessageSize := in_msg.MessageSize;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ if (is_valid(cache_entry)) {
+ tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
+ tbe.Dirty := cache_entry.Dirty;
+ }
+ tbe.From := machineID;
+ }
+
+ action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(vd_vicDirty, "vd", desc="Victimize dirty L3 data") {
+ enqueue(requestNetwork_out, CPURequestMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:VicDirty;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ }
+
+ action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, l3_response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysWBAck;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+ enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(ph_sendProbeResponseHit, "ph", desc="send probe ack, no data") {
+ enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.Dirty := false;
+ out_msg.Hit := true;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pm_sendProbeResponseMiss, "pm", desc="send probe ack, no data") {
+ enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+ enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.DataBlk := cache_entry.DataBlk;
+ assert(cache_entry.Dirty);
+ out_msg.Dirty := true;
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ }
+ }
+
+ action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") {
+ enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.DataBlk := tbe.DataBlk;
+ assert(tbe.Dirty);
+ out_msg.Dirty := true;
+ out_msg.Hit := true;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.State := CoherenceState:NA;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(mc_cancelMemWriteback, "mc", desc="send writeback cancel to memory") {
+ enqueue(requestNetwork_out, CPURequestMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:WrCancel;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ }
+
+ action(a_allocateBlock, "a", desc="allocate L3 block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L3cache.allocate(address, new Entry));
+ }
+ }
+
+ action(d_writeData, "d", desc="write data to L3") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (in_msg.Dirty) {
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ cache_entry.DataBlk := in_msg.DataBlk;
+ DPRINTF(RubySlicc, "Writing to L3: %s\n", in_msg);
+ }
+ }
+
+ action(rd_copyDataFromRequest, "rd", desc="write data to L3") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := true;
+ }
+ }
+
+ action(f_setFrom, "f", desc="set who WB is expected to come from") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ tbe.From := in_msg.Requestor;
+ }
+ }
+
+ action(rf_resetFrom, "rf", desc="reset From") {
+ tbe.From := machineID;
+ }
+
+ action(wb_data, "wb", desc="write back data") {
+ enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUData;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Shared) {
+ out_msg.NbReqShared := true;
+ } else {
+ out_msg.NbReqShared := false;
+ }
+ out_msg.State := CoherenceState:Shared; // faux info
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(wt_writeDataToTBE, "wt", desc="write WB data to TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+ enqueue(unblockNetwork_out, UnblockMsg, l3_request_latency) {
+ out_msg.addr := address;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+ L3cache.setMRU(address);
+ }
+
+ action(p_popRequestQueue, "p", desc="pop request queue") {
+ requestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="pop response queue") {
+ responseNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pn_popNBResponseQueue, "pn", desc="pop NB response queue") {
+ NBResponse_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+
+ action(zz_recycleRequestQueue, "\z", desc="recycle request queue") {
+ requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+
+ // END ACTIONS
+
+ // BEGIN TRANSITIONS
+
+ // transitions from base
+
+ transition({I, I_C}, {RdBlk, RdBlkS, RdBlkM, CtoD}) {TagArrayRead} {
+ r_requestToMem;
+ p_popRequestQueue;
+ }
+
+ transition(O, RdBlk ) {TagArrayRead, DataArrayRead} {
+ rs_sendResponseS;
+ ut_updateTag;
+ p_popRequestQueue;
+ }
+ transition(M, RdBlk, O) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+ rs_sendResponseS;
+ ut_updateTag;
+ p_popRequestQueue;
+ }
+
+ transition(S, RdBlk) {TagArrayRead, DataArrayRead} {
+ rs_sendResponseS;
+ ut_updateTag;
+ p_popRequestQueue;
+ }
+ transition(E, RdBlk, S) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+ rs_sendResponseS;
+ ut_updateTag;
+ p_popRequestQueue;
+ }
+
+ transition({M, O}, RdBlkS, O) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+ rs_sendResponseS;
+ ut_updateTag;
+ p_popRequestQueue;
+ }
+
+ transition({E, S}, RdBlkS, S) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+ rs_sendResponseS;
+ ut_updateTag;
+ p_popRequestQueue;
+ }
+
+ transition(M, RdBlkM, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+ rm_sendResponseM;
+ i_invL3;
+ p_popRequestQueue;
+ }
+
+ transition({O, S}, {RdBlkM, CtoD}) {TagArrayRead} {
+ r_requestToMem; // can't handle this, just forward
+ p_popRequestQueue;
+ }
+
+ transition(E, RdBlkM, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+ rm_sendResponseM;
+ i_invL3;
+ p_popRequestQueue;
+ }
+
+ transition({I}, WrVicBlk, I_M) {TagArrayRead, TagArrayWrite} {
+ a_allocateBlock;
+ t_allocateTBE;
+ f_setFrom;
+// rd_copyDataFromRequest;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(I_C, {WrVicBlk, WrVicBlkShared, ClVicBlk, ClVicBlkShared}) {} {
+ zz_recycleRequestQueue;
+ }
+
+ transition({I}, WrVicBlkShared, I_O) {TagArrayRead, TagArrayWrite} {
+ a_allocateBlock;
+ t_allocateTBE;
+ f_setFrom;
+// rd_copyDataFromRequest;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(S, WrVicBlkShared, S_O) {TagArrayRead, TagArrayWrite} {
+// rd_copyDataFromRequest;
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(S, WrVicBlk, S_M) {TagArrayRead, TagArrayWrite} { // should be technically not possible, but assume the data comes back with shared bit flipped
+// rd_copyDataFromRequest;
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(E, WrVicBlk, E_M) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(E, WrVicBlkShared, E_O) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(O, WrVicBlk, O_M) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(O, WrVicBlkShared, O_O) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(M, WrVicBlk, M_M) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(M, WrVicBlkShared, M_O) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition({I}, ClVicBlk, I_E) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ a_allocateBlock;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition({I}, ClVicBlkShared, I_S) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ a_allocateBlock;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(S, ClVicBlk, S_E) {TagArrayRead, TagArrayWrite} { // technically impossible, assume data comes back with shared bit flipped
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(S, ClVicBlkShared, S_S) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(E, ClVicBlk, E_E) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(E, ClVicBlkShared, E_S) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(O, ClVicBlk, O_E) {TagArrayRead, TagArrayWrite} { // technically impossible, but assume data comes back with shared bit flipped
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(O, ClVicBlkShared, O_S) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(M, ClVicBlk, M_E) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(M, ClVicBlkShared, M_S) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition({MO_I}, {RdBlk, RdBlkS, RdBlkM, CtoD}) {} {
+ r_requestToMem;
+ p_popRequestQueue;
+ }
+
+ transition(MO_I, {WrVicBlkShared, WrVicBlk, ClVicBlk, ClVicBlkShared}, MOD_I) {TagArrayWrite} {
+ f_setFrom;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(I_M, CPUData, M) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M, CPUDataShared, O) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_O, {CPUData, CPUDataShared}, O) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E, CPUData, E) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E, CPUDataShared, S) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(I_S, {CPUData, CPUDataShared}, S) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ pr_popResponseQueue;
+ }
+
+ transition(S_M, CPUDataShared, O) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(S_O, {CPUData, CPUDataShared}, O) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(S_E, CPUDataShared, S) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(S_S, {CPUData, CPUDataShared}, S) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(O_E, CPUDataShared, O) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition(O_S, {CPUData, CPUDataShared}, O) {DataArrayWrite, TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ d_writeData;
+ ut_updateTag; // update tag on writeback hits.
+ pr_popResponseQueue;
+ }
+
+ transition({D_I}, {CPUData, CPUDataShared}, I) {TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(MOD_I, {CPUData, CPUDataShared}, MO_I) {TagArrayWrite} {
+ uu_sendUnblock;
+ rf_resetFrom;
+ pr_popResponseQueue;
+ }
+
+ transition(I_I, {CPUData, CPUDataShared}, MO_I) {TagArrayWrite, DataArrayRead} {
+ uu_sendUnblock;
+ wt_writeDataToTBE;
+ rf_resetFrom;
+ pr_popResponseQueue;
+ }
+
+ transition(I_CD, {CPUData, CPUDataShared}, I) {DataArrayRead, TagArrayWrite} {
+ uu_sendUnblock;
+ wt_writeDataToTBE;
+ wb_data;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition({M, O}, L3_Repl, MO_I) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ vd_vicDirty;
+ i_invL3;
+ }
+
+ transition({E, S,}, L3_Repl, I) {TagArrayRead, TagArrayWrite} {
+ i_invL3;
+ }
+
+ transition({I_M, I_O, S_M, S_O, E_M, E_O}, L3_Repl) {} {
+ zz_recycleRequestQueue;
+ }
+
+ transition({O_M, O_O, O_E, O_S, M_M, M_O, M_E, M_S}, L3_Repl) {} {
+ zz_recycleRequestQueue;
+ }
+
+ transition({I_E, I_S, S_E, S_S, E_E, E_S}, L3_Repl) {} {
+ zz_recycleRequestQueue;
+ }
+
+ transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+ pd_sendProbeResponseData;
+ i_invL3;
+ pp_popProbeQueue;
+ }
+
+ transition({E, S, I}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ i_invL3; // nothing will happen in I
+ pp_popProbeQueue;
+ }
+
+ transition({M, O, E, S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ i_invL3; // nothing will happen in I
+ pp_popProbeQueue;
+ }
+
+ transition({M, O}, PrbShrData, O) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({E, S}, PrbShrData, S) {TagArrayRead, TagArrayWrite} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition(I, PrbShrData) {TagArrayRead} {
+ pm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbInvData, I_C) {TagArrayWrite, DataArrayRead} {
+ pdt_sendProbeResponseDataFromTBE;
+ mc_cancelMemWriteback;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbInv, I_C) {TagArrayWrite} {
+ pi_sendProbeResponseInv;
+ mc_cancelMemWriteback;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbShrData) {DataArrayRead} {
+ pdt_sendProbeResponseDataFromTBE;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, {PrbInvData, PrbInv}) {} {
+ pi_sendProbeResponseInv;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, PrbShrData) {} {
+ pm_sendProbeResponseMiss;
+ pp_popProbeQueue;
+ }
+
+ transition(I_I, {WBAck}, I_CD) {TagArrayWrite} {
+ pn_popNBResponseQueue;
+ }
+
+ transition(MOD_I, WBAck, D_I) {DataArrayRead} {
+ wb_data;
+ pn_popNBResponseQueue;
+ }
+
+ transition(MO_I, WBAck, I) {DataArrayRead, TagArrayWrite} {
+ wb_data;
+ dt_deallocateTBE;
+ pn_popNBResponseQueue;
+ }
+
+ transition(I_C, {WBAck}, I) {TagArrayWrite} {
+ dt_deallocateTBE;
+ pn_popNBResponseQueue;
+ }
+
+ transition({I_M, I_O, I_E, I_S}, CancelWB, I) {TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ i_invL3;
+ p_popRequestQueue;
+ }
+
+ transition({S_S, S_O, S_M, S_E}, CancelWB, S) {TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition({E_M, E_O, E_E, E_S}, CancelWB, E) {TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition({O_M, O_O, O_E, O_S}, CancelWB, O) {TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition({M_M, M_O, M_E, M_S}, CancelWB, M) {TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition(D_I, CancelWB, I) {TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition(MOD_I, CancelWB, MO_I) {TagArrayWrite} {
+ uu_sendUnblock;
+ rf_resetFrom;
+ p_popRequestQueue;
+ }
+
+ transition(I_I, CancelWB, I_C) {TagArrayWrite} {
+ uu_sendUnblock;
+ rf_resetFrom;
+ mc_cancelMemWriteback;
+ p_popRequestQueue;
+ }
+
+ transition(I_CD, CancelWB, I) {TagArrayWrite} {
+ uu_sendUnblock;
+ dt_deallocateTBE;
+ mc_cancelMemWriteback;
+ p_popRequestQueue;
+ }
+
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm b/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm
new file mode 100644
index 000000000..fd84447a2
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm
@@ -0,0 +1,3009 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:CorePair, "CP-like Core Coherence")
+ : Sequencer * sequencer;
+ Sequencer * sequencer1;
+ CacheMemory * L1Icache;
+ CacheMemory * L1D0cache;
+ CacheMemory * L1D1cache;
+ CacheMemory * L2cache;
+ int regionBufferNum;
+ bool send_evictions := "False";
+ Cycles issue_latency := 5;
+ Cycles l2_hit_latency := 18;
+
+ // BEGIN Core Buffers
+
+ // To the Network
+ MessageBuffer * requestFromCore, network="To", virtual_network="0", ordered="true", vnet_type="request";
+ MessageBuffer * responseFromCore, network="To", virtual_network="2", ordered="false", vnet_type="response";
+ MessageBuffer * unblockFromCore, network="To", virtual_network="4", ordered="false", vnet_type="unblock";
+
+ // From the Network
+ MessageBuffer * probeToCore, network="From", virtual_network="0", ordered="false", vnet_type="request";
+ MessageBuffer * responseToCore, network="From", virtual_network="2", ordered="false", vnet_type="response";
+
+ MessageBuffer * mandatoryQueue, ordered="false";
+ MessageBuffer * triggerQueue, ordered="true";
+
+ // END Core Buffers
+
+{
+ // BEGIN STATES
+ state_declaration(State, desc="Cache states", default="CorePair_State_I") {
+
+ I, AccessPermission:Invalid, desc="Invalid";
+ S, AccessPermission:Read_Only, desc="Shared";
+ E0, AccessPermission:Read_Write, desc="Exclusive with Cluster 0 ownership";
+ E1, AccessPermission:Read_Write, desc="Exclusive with Cluster 1 ownership";
+ Es, AccessPermission:Read_Write, desc="Exclusive in core";
+ O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line";
+ Ms, AccessPermission:Read_Write, desc="Modified in core, both clusters may be sharing line";
+ M0, AccessPermission:Read_Write, desc="Modified with cluster ownership";
+ M1, AccessPermission:Read_Write, desc="Modified with cluster ownership";
+
+ // Transient States
+ I_M0, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+ I_M1, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+ I_M0M1, AccessPermission:Busy, desc="Was in I_M0, got a store request from other cluster as well";
+ I_M1M0, AccessPermission:Busy, desc="Was in I_M1, got a store request from other cluster as well";
+ I_M0Ms, AccessPermission:Busy, desc="Was in I_M0, got a load request from other cluster as well";
+ I_M1Ms, AccessPermission:Busy, desc="Was in I_M1, got a load request from other cluster as well";
+ I_E0S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+ I_E1S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+ I_ES, AccessPermission:Busy, desc="S_F got hit by invalidating probe, RdBlk response needs to go to both clusters";
+
+ IF_E0S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E0S but expecting a L2_to_L1D0 trigger, just drop when receive";
+ IF_E1S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E1S but expecting a L2_to_L1D1 trigger, just drop when receive";
+ IF_ES, AccessPermission:Busy, desc="same, but waiting for two fills";
+ IF0_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one";
+ IF1_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one";
+ F_S0, AccessPermission:Busy, desc="same, but going to S0 when trigger received";
+ F_S1, AccessPermission:Busy, desc="same, but going to S1 when trigger received";
+
+ ES_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for clean writeback ack";
+ MO_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for dirty writeback ack";
+ MO_S0, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS";
+ MO_S1, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS";
+ S_F0, AccessPermission:Read_Only, desc="Shared, filling L1";
+ S_F1, AccessPermission:Read_Only, desc="Shared, filling L1";
+ S_F, AccessPermission:Read_Only, desc="Shared, filling L1";
+ O_F0, AccessPermission:Read_Only, desc="Owned, filling L1";
+ O_F1, AccessPermission:Read_Only, desc="Owned, filling L1";
+ O_F, AccessPermission:Read_Only, desc="Owned, filling L1";
+ Si_F0, AccessPermission:Read_Only, desc="Shared, filling icache";
+ Si_F1, AccessPermission:Read_Only, desc="Shared, filling icache";
+ S_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+ S_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+ O_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+ O_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+ S0, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 0, waiting for response";
+ S1, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 1, waiting for response";
+
+ Es_F0, AccessPermission:Read_Write, desc="Es, Cluster read, filling";
+ Es_F1, AccessPermission:Read_Write, desc="Es, Cluster read, filling";
+ Es_F, AccessPermission:Read_Write, desc="Es, other cluster read, filling";
+ E0_F, AccessPermission:Read_Write, desc="E0, cluster read, filling";
+ E1_F, AccessPermission:Read_Write, desc="...";
+ E0_Es, AccessPermission:Read_Write, desc="...";
+ E1_Es, AccessPermission:Read_Write, desc="...";
+ Ms_F0, AccessPermission:Read_Write, desc="...";
+ Ms_F1, AccessPermission:Read_Write, desc="...";
+ Ms_F, AccessPermission:Read_Write, desc="...";
+ M0_F, AccessPermission:Read_Write, desc="...";
+ M0_Ms, AccessPermission:Read_Write, desc="...";
+ M1_F, AccessPermission:Read_Write, desc="...";
+ M1_Ms, AccessPermission:Read_Write, desc="...";
+
+ I_C, AccessPermission:Invalid, desc="Invalid, but waiting for WBAck from NB from canceled writeback";
+ S0_C, AccessPermission:Busy, desc="MO_S0 hit by invalidating probe, waiting for WBAck form NB for canceled WB";
+ S1_C, AccessPermission:Busy, desc="MO_S1 hit by invalidating probe, waiting for WBAck form NB for canceled WB";
+ S_C, AccessPermission:Busy, desc="S*_C got NB_AckS, still waiting for WBAck";
+
+ } // END STATES
+
+ // BEGIN EVENTS
+ enumeration(Event, desc="CP Events") {
+ // CP Initiated events
+ C0_Load_L1miss, desc="Cluster 0 load, L1 missed";
+ C0_Load_L1hit, desc="Cluster 0 load, L1 hit";
+ C1_Load_L1miss, desc="Cluster 1 load L1 missed";
+ C1_Load_L1hit, desc="Cluster 1 load L1 hit";
+ Ifetch0_L1hit, desc="Instruction fetch, hit in the L1";
+ Ifetch1_L1hit, desc="Instruction fetch, hit in the L1";
+ Ifetch0_L1miss, desc="Instruction fetch, missed in the L1";
+ Ifetch1_L1miss, desc="Instruction fetch, missed in the L1";
+ C0_Store_L1miss, desc="Cluster 0 store missed in L1";
+ C0_Store_L1hit, desc="Cluster 0 store hit in L1";
+ C1_Store_L1miss, desc="Cluster 1 store missed in L1";
+ C1_Store_L1hit, desc="Cluster 1 store hit in L1";
+ // NB Initiated events
+ NB_AckS, desc="NB Ack to Core Request";
+ NB_AckM, desc="NB Ack to Core Request";
+ NB_AckE, desc="NB Ack to Core Request";
+
+ NB_AckWB, desc="NB Ack for writeback";
+
+ // Memory System initiatied events
+ L1I_Repl, desc="Replace address from L1I"; // Presumed clean
+ L1D0_Repl, desc="Replace address from L1D0"; // Presumed clean
+ L1D1_Repl, desc="Replace address from L1D1"; // Presumed clean
+ L2_Repl, desc="Replace address from L2";
+
+ L2_to_L1D0, desc="L1 fill from L2";
+ L2_to_L1D1, desc="L1 fill from L2";
+ L2_to_L1I, desc="L1 fill from L2";
+
+ // Probe Events
+ PrbInvData, desc="probe, return O or M data";
+ PrbInvDataDemand, desc="probe, return O or M data. Demand request";
+ PrbInv, desc="probe, no need for data";
+ PrbShrData, desc="probe downgrade, return O or M data";
+ PrbShrDataDemand, desc="probe downgrade, return O or M data. Demand request";
+ ForceRepl, desc="probe from r-buf. Act as though a repl";
+ ForceDowngrade, desc="probe from r-buf. Act as though a repl";
+
+ } // END EVENTS
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ L1D0DataArrayRead, desc="Read the data array";
+ L1D0DataArrayWrite, desc="Write the data array";
+ L1D0TagArrayRead, desc="Read the data array";
+ L1D0TagArrayWrite, desc="Write the data array";
+ L1D1DataArrayRead, desc="Read the data array";
+ L1D1DataArrayWrite, desc="Write the data array";
+ L1D1TagArrayRead, desc="Read the data array";
+ L1D1TagArrayWrite, desc="Write the data array";
+ L1IDataArrayRead, desc="Read the data array";
+ L1IDataArrayWrite, desc="Write the data array";
+ L1ITagArrayRead, desc="Read the data array";
+ L1ITagArrayWrite, desc="Write the data array";
+ L2DataArrayRead, desc="Read the data array";
+ L2DataArrayWrite, desc="Write the data array";
+ L2TagArrayRead, desc="Read the data array";
+ L2TagArrayWrite, desc="Write the data array";
+ }
+
+
+ // BEGIN STRUCTURE DEFINITIONS
+
+
+ // Cache Entry
+ structure(Entry, desc="...", interface="AbstractCacheEntry") {
+ State CacheState, desc="cache state";
+ bool Dirty, desc="Is the data dirty (diff than memory)?";
+ DataBlock DataBlk, desc="data for the block";
+ bool FromL2, default="false", desc="block just moved from L2";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
+ bool Dirty, desc="Is the data dirty (different than memory)?";
+ int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
+ bool Shared, desc="Victim hit by shared probe";
+ bool AckNeeded, desc="True if need to ack r-dir";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<CorePair_TBE>", constructor="m_number_of_TBEs";
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ // END STRUCTURE DEFINITIONS
+
+ // BEGIN INTERNAL FUNCTIONS
+
+ MachineID getPeer(MachineID mach) {
+ return createMachineID(MachineType:RegionBuffer, intToID(regionBufferNum));
+ }
+
+ bool addressInCore(Addr addr) {
+ return (L2cache.isTagPresent(addr) || L1Icache.isTagPresent(addr) || L1D0cache.isTagPresent(addr) || L1D1cache.isTagPresent(addr));
+ }
+
+ Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+ Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address));
+ return L2cache_entry;
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return tbe.DataBlk;
+ } else {
+ return getCacheEntry(addr).DataBlk;
+ }
+ }
+
+ Entry getL1CacheEntry(Addr addr, int cluster), return_by_pointer="yes" {
+ if (cluster == 0) {
+ Entry L1D0_entry := static_cast(Entry, "pointer", L1D0cache.lookup(addr));
+ return L1D0_entry;
+ } else {
+ Entry L1D1_entry := static_cast(Entry, "pointer", L1D1cache.lookup(addr));
+ return L1D1_entry;
+ }
+ }
+
+ Entry getICacheEntry(Addr addr), return_by_pointer="yes" {
+ Entry c_entry := static_cast(Entry, "pointer", L1Icache.lookup(addr));
+ return c_entry;
+ }
+
+ bool presentOrAvail2(Addr addr) {
+ return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+ }
+
+ bool presentOrAvailI(Addr addr) {
+ return L1Icache.isTagPresent(addr) || L1Icache.cacheAvail(addr);
+ }
+
+ bool presentOrAvailD0(Addr addr) {
+ return L1D0cache.isTagPresent(addr) || L1D0cache.cacheAvail(addr);
+ }
+
+ bool presentOrAvailD1(Addr addr) {
+ return L1D1cache.isTagPresent(addr) || L1D1cache.cacheAvail(addr);
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if(is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.CacheState;
+ }
+ return State:I;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+
+ if (is_valid(cache_entry)) {
+ cache_entry.CacheState := state;
+ }
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ return CorePair_State_to_permission(tbe.TBEState);
+ }
+
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return CorePair_State_to_permission(cache_entry.CacheState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ bool isValid(Addr addr) {
+ AccessPermission perm := getAccessPermission(addr);
+ if (perm == AccessPermission:NotPresent ||
+ perm == AccessPermission:Invalid ||
+ perm == AccessPermission:Busy) {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(CorePair_State_to_permission(state));
+ }
+ }
+
+ MachineType testAndClearLocalHit(Entry cache_entry) {
+ assert(is_valid(cache_entry));
+ if (cache_entry.FromL2) {
+ cache_entry.FromL2 := false;
+ return MachineType:L2Cache;
+ } else {
+ return MachineType:L1Cache;
+ }
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L1D0DataArrayRead) {
+ L1D0cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L1D0DataArrayWrite) {
+ L1D0cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L1D0TagArrayRead) {
+ L1D0cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L1D0TagArrayWrite) {
+ L1D0cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ } else if (request_type == RequestType:L1D1DataArrayRead) {
+ L1D1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L1D1DataArrayWrite) {
+ L1D1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L1D1TagArrayRead) {
+ L1D1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L1D1TagArrayWrite) {
+ L1D1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ } else if (request_type == RequestType:L1IDataArrayRead) {
+ L1Icache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L1IDataArrayWrite) {
+ L1Icache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L1ITagArrayRead) {
+ L1Icache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L1ITagArrayWrite) {
+ L1Icache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ } else if (request_type == RequestType:L2DataArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L2DataArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L2TagArrayRead) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L2TagArrayWrite) {
+ L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L2DataArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L2DataArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L2TagArrayRead) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L2TagArrayWrite) {
+ return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1D0DataArrayRead) {
+ return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1D0DataArrayWrite) {
+ return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1D0TagArrayRead) {
+ return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1D0TagArrayWrite) {
+ return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1D1DataArrayRead) {
+ return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1D1DataArrayWrite) {
+ return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1D1TagArrayRead) {
+ return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1D1TagArrayWrite) {
+ return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1IDataArrayRead) {
+ return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1IDataArrayWrite) {
+ return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L1ITagArrayRead) {
+ return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L1ITagArrayWrite) {
+ return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ return true;
+ }
+ }
+
+ // END INTERNAL FUNCTIONS
+
+ // ** OUT_PORTS **
+
+ out_port(requestNetwork_out, CPURequestMsg, requestFromCore);
+ out_port(responseNetwork_out, ResponseMsg, responseFromCore);
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+ out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
+
+ // ** IN_PORTS **
+
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue, block_on="addr") {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == TriggerType:L2_to_L1) {
+ if (in_msg.Dest == CacheId:L1I) {
+ trigger(Event:L2_to_L1I, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Dest == CacheId:L1D0) {
+ trigger(Event:L2_to_L1D0, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Dest == CacheId:L1D1) {
+ trigger(Event:L2_to_L1D1, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("unexpected trigger dest");
+ }
+ }
+ }
+ }
+ }
+
+
+ in_port(probeNetwork_in, NBProbeRequestMsg, probeToCore) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, NBProbeRequestMsg, block_on="addr") {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == ProbeRequestType:PrbInv) {
+ if (in_msg.DemandRequest) {
+ trigger(Event:PrbInvDataDemand, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.ReturnData) {
+ trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+ if (in_msg.DemandRequest) {
+ trigger(Event:PrbShrDataDemand, in_msg.addr, cache_entry, tbe);
+ } else {
+ assert(in_msg.ReturnData);
+ trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == ProbeRequestType:PrbRepl) {
+ trigger(Event:ForceRepl, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == ProbeRequestType:PrbRegDowngrade) {
+ trigger(Event:ForceDowngrade, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unknown probe request");
+ }
+ }
+ }
+ }
+
+
+ // ResponseNetwork
+ in_port(responseToCore_in, ResponseMsg, responseToCore) {
+ if (responseToCore_in.isReady(clockEdge())) {
+ peek(responseToCore_in, ResponseMsg, block_on="addr") {
+
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+
+ if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+ if (in_msg.State == CoherenceState:Modified) {
+ trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.State == CoherenceState:Shared) {
+ trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.State == CoherenceState:Exclusive) {
+ trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+ trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unexpected Response Message to Core");
+ }
+ }
+ }
+ }
+
+ // Nothing from the Unblock Network
+
+ // Mandatory Queue
+ in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+ if (mandatoryQueue_in.isReady(clockEdge())) {
+ peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+
+ Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+ TBE tbe := TBEs.lookup(in_msg.LineAddress);
+
+ if (in_msg.Type == RubyRequestType:IFETCH) {
+ // FETCH ACCESS
+
+ if (L1Icache.isTagPresent(in_msg.LineAddress)) {
+ if (mod(in_msg.contextId, 2) == 0) {
+ trigger(Event:Ifetch0_L1hit, in_msg.LineAddress, cache_entry, tbe);
+ } else {
+ trigger(Event:Ifetch1_L1hit, in_msg.LineAddress, cache_entry, tbe);
+ }
+ } else {
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ if (presentOrAvailI(in_msg.LineAddress)) {
+ if (mod(in_msg.contextId, 2) == 0) {
+ trigger(Event:Ifetch0_L1miss, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ trigger(Event:Ifetch1_L1miss, in_msg.LineAddress, cache_entry,
+ tbe);
+ }
+ } else {
+ Addr victim := L1Icache.cacheProbe(in_msg.LineAddress);
+ trigger(Event:L1I_Repl, victim,
+ getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else { // Not present or avail in L2
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ DPRINTF(RubySlicc, "Victim for %s L2_Repl(0) is %s\n", in_msg.LineAddress, victim);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ }
+ } else {
+ // DATA ACCESS
+ if (mod(in_msg.contextId, 2) == 1) {
+ if (L1D1cache.isTagPresent(in_msg.LineAddress)) {
+ if (in_msg.Type == RubyRequestType:LD) {
+ trigger(Event:C1_Load_L1hit, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ // Stores must write through, make sure L2 avail.
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ trigger(Event:C1_Store_L1hit, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ DPRINTF(RubySlicc, "Victim for %s L2_Repl(1) is %s\n", in_msg.LineAddress, victim);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ }
+ } else {
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ if (presentOrAvailD1(in_msg.LineAddress)) {
+ if (in_msg.Type == RubyRequestType:LD) {
+ trigger(Event:C1_Load_L1miss, in_msg.LineAddress,
+ cache_entry, tbe);
+ } else {
+ trigger(Event:C1_Store_L1miss, in_msg.LineAddress,
+ cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L1D1cache.cacheProbe(in_msg.LineAddress);
+ DPRINTF(RubySlicc, "Victim for %s L1D1_Repl is %s\n", in_msg.LineAddress, victim);
+ trigger(Event:L1D1_Repl, victim,
+ getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ } else { // not present or avail in L2
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ DPRINTF(RubySlicc, "Victim for %s L2_Repl(2) is %s\n", in_msg.LineAddress, victim);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+ }
+ }
+ } else {
+ Entry L1D0cache_entry := getL1CacheEntry(in_msg.LineAddress, 0);
+ if (is_valid(L1D0cache_entry)) {
+ if (in_msg.Type == RubyRequestType:LD) {
+ trigger(Event:C0_Load_L1hit, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ trigger(Event:C0_Store_L1hit, in_msg.LineAddress, cache_entry,
+ tbe);
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ DPRINTF(RubySlicc, "Victim for %s L2_Repl(3) is %s\n", in_msg.LineAddress, victim);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ }
+ } else {
+ if (presentOrAvail2(in_msg.LineAddress)) {
+ if (presentOrAvailD0(in_msg.LineAddress)) {
+ if (in_msg.Type == RubyRequestType:LD) {
+ trigger(Event:C0_Load_L1miss, in_msg.LineAddress,
+ cache_entry, tbe);
+ } else {
+ trigger(Event:C0_Store_L1miss, in_msg.LineAddress,
+ cache_entry, tbe);
+ }
+ } else {
+ Addr victim := L1D0cache.cacheProbe(in_msg.LineAddress);
+ DPRINTF(RubySlicc, "Victim for %s L1D0_Repl is %s\n", in_msg.LineAddress, victim);
+ trigger(Event:L1D0_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ } else {
+ Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+ DPRINTF(RubySlicc, "Victim for %s L2_Repl(4) is %s\n", in_msg.LineAddress, victim);
+ trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+ TBEs.lookup(victim));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ // ACTIONS
+ action(ii_invIcache, "ii", desc="invalidate iCache") {
+ if (L1Icache.isTagPresent(address)) {
+ L1Icache.deallocate(address);
+ }
+ }
+
+ action(i0_invCluster, "i0", desc="invalidate cluster 0") {
+ if (L1D0cache.isTagPresent(address)) {
+ L1D0cache.deallocate(address);
+ }
+ }
+
+ action(i1_invCluster, "i1", desc="invalidate cluster 1") {
+ if (L1D1cache.isTagPresent(address)) {
+ L1D1cache.deallocate(address);
+ }
+ }
+
+ action(ib_invBothClusters, "ib", desc="invalidate both clusters") {
+ if (L1D0cache.isTagPresent(address)) {
+ L1D0cache.deallocate(address);
+ }
+ if (L1D1cache.isTagPresent(address)) {
+ L1D1cache.deallocate(address);
+ }
+ }
+
+ action(i2_invL2, "i2", desc="invalidate L2") {
+ if(is_valid(cache_entry)) {
+ L2cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
+ action(n_issueRdBlk, "n", desc="Issue RdBlk") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlk;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkM;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(nMs_issueRdBlkMSinked, "nMs", desc="Issue RdBlkM with CtoDSinked") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkM;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.CtoDSinked := true;
+ }
+ }
+
+ action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkS;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := curCycle();
+ }
+ }
+
+ action(nSs_issueRdBlkSSinked, "nSs", desc="Issue RdBlkS with CtoDSinked") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:RdBlkS;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.CtoDSinked := true;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ }
+
+ action(vd_victim, "vd", desc="Victimize M/O L2 Data") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ assert(is_valid(cache_entry));
+ out_msg.DataBlk := cache_entry.DataBlk;
+ assert(cache_entry.Dirty);
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicDirty;
+ out_msg.InitialRequestTime := curCycle();
+ if (cache_entry.CacheState == State:O) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ }
+ }
+
+ action(vc_victim, "vc", desc="Victimize E/S L2 Data") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicClean;
+ out_msg.InitialRequestTime := curCycle();
+ if (cache_entry.CacheState == State:S) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ }
+ }
+
+ // Could send these two directly to dir if we made a new out network on channel 0
+ action(vdf_victimForce, "vdf", desc="Victimize M/O L2 Data") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ assert(is_valid(cache_entry));
+ out_msg.DataBlk := cache_entry.DataBlk;
+ assert(cache_entry.Dirty);
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicDirty;
+ out_msg.InitialRequestTime := curCycle();
+ if (cache_entry.CacheState == State:O) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ out_msg.Private := true;
+ }
+ }
+
+ action(vcf_victimForce, "vcf", desc="Victimize E/S L2 Data") {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Type := CoherenceRequestType:VicClean;
+ out_msg.InitialRequestTime := curCycle();
+ if (cache_entry.CacheState == State:S) {
+ out_msg.Shared := true;
+ } else {
+ out_msg.Shared := false;
+ }
+ out_msg.Private := true;
+ }
+ }
+
+ action(a0_allocateL1D, "a0", desc="Allocate L1D0 Block") {
+ if (L1D0cache.isTagPresent(address) == false) {
+ L1D0cache.allocateVoid(address, new Entry);
+ }
+ }
+
+ action(a1_allocateL1D, "a1", desc="Allocate L1D1 Block") {
+ if (L1D1cache.isTagPresent(address) == false) {
+ L1D1cache.allocateVoid(address, new Entry);
+ }
+ }
+
+ action(ai_allocateL1I, "ai", desc="Allocate L1I Block") {
+ if (L1Icache.isTagPresent(address) == false) {
+ L1Icache.allocateVoid(address, new Entry);
+ }
+ }
+
+ action(a2_allocateL2, "a2", desc="Allocate L2 Block") {
+ if (is_invalid(cache_entry)) {
+ set_cache_entry(L2cache.allocate(address, new Entry));
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ assert(is_valid(cache_entry));
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs
+ tbe.Dirty := cache_entry.Dirty;
+ tbe.Shared := false;
+ }
+
+ action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+ mandatoryQueue_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+ responseToCore_in.dequeue(clockEdge());
+ }
+
+ action(pt_popTriggerQueue, "pt", desc="Pop Trigger Queue") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+
+ action(il0_loadDone, "il0", desc="Cluster 0 i load done") {
+ Entry entry := getICacheEntry(address);
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ assert(is_valid(entry));
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer.readCallback(address,
+ l2entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ }
+
+ action(il1_loadDone, "il1", desc="Cluster 1 i load done") {
+ Entry entry := getICacheEntry(address);
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ assert(is_valid(entry));
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer1.readCallback(address,
+ l2entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ }
+
+ action(l0_loadDone, "l0", desc="Cluster 0 load done") {
+ Entry entry := getL1CacheEntry(address, 0);
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ assert(is_valid(entry));
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer.readCallback(address,
+ l2entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ }
+
+ action(l1_loadDone, "l1", desc="Cluster 1 load done") {
+ Entry entry := getL1CacheEntry(address, 1);
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ assert(is_valid(entry));
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ sequencer1.readCallback(address,
+ l2entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ }
+
+ action(xl0_loadDone, "xl0", desc="Cluster 0 load done") {
+ peek(responseToCore_in, ResponseMsg) {
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ DPRINTF(ProtocolTrace, "CP Load Done 0 -- address %s, data: %s\n",
+ address, l2entry.DataBlk);
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ assert(is_valid(l2entry));
+ sequencer.readCallback(address,
+ l2entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ }
+
+ action(xl1_loadDone, "xl1", desc="Cluster 1 load done") {
+ peek(responseToCore_in, ResponseMsg) {
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ assert(is_valid(l2entry));
+ sequencer1.readCallback(address,
+ l2entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ }
+
+ action(xi0_loadDone, "xi0", desc="Cluster 0 i-load done") {
+ peek(responseToCore_in, ResponseMsg) {
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ assert(is_valid(l2entry));
+ sequencer.readCallback(address,
+ l2entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ }
+
+ action(xi1_loadDone, "xi1", desc="Cluster 1 i-load done") {
+ peek(responseToCore_in, ResponseMsg) {
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ Entry l2entry := getCacheEntry(address); // Used for functional accesses
+ // L2 supplies data (functional accesses only look in L2, ok because L1
+ // writes through to L2)
+ assert(is_valid(l2entry));
+ sequencer1.readCallback(address,
+ l2entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ }
+ }
+
+ action(s0_storeDone, "s0", desc="Cluster 0 store done") {
+ Entry entry := getL1CacheEntry(address, 0);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ sequencer.writeCallback(address,
+ cache_entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ cache_entry.Dirty := true;
+ entry.DataBlk := cache_entry.DataBlk;
+ entry.Dirty := true;
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ }
+
+ action(s1_storeDone, "s1", desc="Cluster 1 store done") {
+ Entry entry := getL1CacheEntry(address, 1);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ sequencer1.writeCallback(address,
+ cache_entry.DataBlk,
+ true,
+ testAndClearLocalHit(entry));
+ cache_entry.Dirty := true;
+ entry.Dirty := true;
+ entry.DataBlk := cache_entry.DataBlk;
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ }
+
+ action(xs0_storeDone, "xs0", desc="Cluster 0 store done") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getL1CacheEntry(address, 0);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ sequencer.writeCallback(address,
+ cache_entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ cache_entry.Dirty := true;
+ entry.Dirty := true;
+ entry.DataBlk := cache_entry.DataBlk;
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ }
+ }
+
+ action(xs1_storeDone, "xs1", desc="Cluster 1 store done") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getL1CacheEntry(address, 1);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+ (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+ sequencer1.writeCallback(address,
+ cache_entry.DataBlk,
+ false,
+ machineIDToMachineType(in_msg.Sender),
+ in_msg.InitialRequestTime,
+ in_msg.ForwardRequestTime,
+ in_msg.ProbeRequestStartTime);
+ cache_entry.Dirty := true;
+ entry.Dirty := true;
+ entry.DataBlk := cache_entry.DataBlk;
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ }
+ }
+
+ action(forward_eviction_to_cpu0, "fec0", desc="sends eviction information to processor0") {
+ if (send_evictions) {
+ DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+ sequencer.evictionCallback(address);
+ }
+ }
+
+ action(forward_eviction_to_cpu1, "fec1", desc="sends eviction information to processor1") {
+ if (send_evictions) {
+ DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+ sequencer1.evictionCallback(address);
+ }
+ }
+
+ action(ci_copyL2ToL1, "ci", desc="copy L2 data to L1") {
+ Entry entry := getICacheEntry(address);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.Dirty := cache_entry.Dirty;
+ entry.DataBlk := cache_entry.DataBlk;
+ entry.FromL2 := true;
+ }
+
+ action(c0_copyL2ToL1, "c0", desc="copy L2 data to L1") {
+ Entry entry := getL1CacheEntry(address, 0);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.Dirty := cache_entry.Dirty;
+ entry.DataBlk := cache_entry.DataBlk;
+ entry.FromL2 := true;
+ }
+
+ action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+ peek(responseToCore_in, ResponseMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:StaleNotif;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(c1_copyL2ToL1, "c1", desc="copy L2 data to L1") {
+ Entry entry := getL1CacheEntry(address, 1);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.Dirty := cache_entry.Dirty;
+ entry.DataBlk := cache_entry.DataBlk;
+ entry.FromL2 := true;
+ }
+
+ action(fi_L2ToL1, "fi", desc="L2 to L1 inst fill") {
+ enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L2_to_L1;
+ out_msg.Dest := CacheId:L1I;
+ }
+ }
+
+ action(f0_L2ToL1, "f0", desc="L2 to L1 data fill") {
+ enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L2_to_L1;
+ out_msg.Dest := CacheId:L1D0;
+ }
+ }
+
+ action(f1_L2ToL1, "f1", desc="L2 to L1 data fill") {
+ enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L2_to_L1;
+ out_msg.Dest := CacheId:L1D1;
+ }
+ }
+
+ action(wi_writeIcache, "wi", desc="write data to icache (and l2)") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getICacheEntry(address);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.DataBlk := in_msg.DataBlk;
+ entry.Dirty := in_msg.Dirty;
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(w0_writeDcache, "w0", desc="write data to dcache 0 (and l2)") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getL1CacheEntry(address, 0);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.DataBlk := in_msg.DataBlk;
+ entry.Dirty := in_msg.Dirty;
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(w1_writeDcache, "w1", desc="write data to dcache 1 (and l2)") {
+ peek(responseToCore_in, ResponseMsg) {
+ Entry entry := getL1CacheEntry(address, 1);
+ assert(is_valid(entry));
+ assert(is_valid(cache_entry));
+ entry.DataBlk := in_msg.DataBlk;
+ entry.Dirty := in_msg.Dirty;
+ cache_entry.DataBlk := in_msg.DataBlk;
+ cache_entry.Dirty := in_msg.Dirty;
+ }
+ }
+
+ action(wb_data, "wb", desc="write back data") {
+ peek(responseToCore_in, ResponseMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUData;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Shared) {
+ out_msg.NbReqShared := true;
+ } else {
+ out_msg.NbReqShared := false;
+ }
+ out_msg.State := CoherenceState:Shared; // faux info
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.Dirty := false;
+ out_msg.Hit := false;
+ out_msg.Ntsl := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.Dirty := false;
+ out_msg.Ntsl := true;
+ out_msg.Hit := false;
+ APPEND_TRANSITION_COMMENT("Setting Ms");
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(ph_sendProbeResponseHit, "ph", desc="send probe ack PrbShrData, no data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ assert(addressInCore(address) || is_valid(tbe));
+ out_msg.Dirty := false; // only true if sending back data i think
+ out_msg.Hit := true;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(pb_sendProbeResponseBackprobe, "pb", desc="send probe ack PrbShrData, no data, check for L1 residence") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ if (addressInCore(address)) {
+ out_msg.Hit := true;
+ } else {
+ out_msg.Hit := false;
+ }
+ out_msg.Dirty := false; // not sending back data, so def. not dirty
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.DataBlk := cache_entry.DataBlk;
+ assert(cache_entry.Dirty);
+ out_msg.Dirty := true;
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.DataBlk := cache_entry.DataBlk;
+ assert(cache_entry.Dirty);
+ out_msg.Dirty := true;
+ out_msg.Hit := true;
+ APPEND_TRANSITION_COMMENT("Setting Ms");
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ assert(is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.DataBlk := tbe.DataBlk;
+ assert(tbe.Dirty);
+ out_msg.Dirty := true;
+ out_msg.Hit := true;
+ out_msg.State := CoherenceState:NA;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.isValid := isValid(address);
+ }
+ }
+
+ action(ra_sendReplAck, "ra", desc="Send ack to r-buf that line is replaced if needed") {
+ if (is_invalid(tbe) || tbe.AckNeeded) {
+ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:InvAck;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ APPEND_TRANSITION_COMMENT(" Sending ack to r-buf ");
+ } else {
+ APPEND_TRANSITION_COMMENT(" NOT Sending ack to r-buf ");
+ }
+ }
+
+ action(m_markAckNeeded, "m", desc="Mark TBE to send ack when deallocated") {
+ assert(is_valid(tbe));
+ tbe.AckNeeded := true;
+ }
+
+ action(mc_cancelWB, "mc", desc="send writeback cancel to L3") {
+ enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:CPUCancelWB;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(s_setSharedFlip, "s", desc="hit by shared probe, status may be different") {
+ assert(is_valid(tbe));
+ tbe.Shared := true;
+ }
+
+ action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+ enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
+ out_msg.addr := address;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ out_msg.wasValid := isValid(address);
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(sdv_sendDoneValid, "sdv", desc="Request finished, send done ack") {
+ enqueue(unblockNetwork_out, UnblockMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.DoneAck := true;
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ if (is_valid(tbe)) {
+ out_msg.Dirty := tbe.Dirty;
+ } else if (is_valid(cache_entry)) {
+ out_msg.Dirty := cache_entry.Dirty;
+ } else {
+ out_msg.Dirty := false;
+ }
+ out_msg.validToInvalid := false;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(sdi_sendDoneInvalid, "sdi", desc="Request finished, send done ack") {
+ enqueue(unblockNetwork_out, UnblockMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Destination.add(getPeer(machineID));
+ out_msg.DoneAck := true;
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ if (is_valid(tbe)) {
+ out_msg.Dirty := tbe.Dirty;
+ } else if (is_valid(cache_entry)) {
+ out_msg.Dirty := cache_entry.Dirty;
+ } else {
+ out_msg.Dirty := false;
+ }
+ out_msg.validToInvalid := true;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(l10m_profileMiss, "l10m", desc="l10m miss profile") {
+ ++L1D0cache.demand_misses;
+ }
+
+ action(l11m_profileMiss, "l11m", desc="l11m miss profile") {
+ ++L1D1cache.demand_misses;
+ }
+
+ action(l1im_profileMiss, "l1lm", desc="l1im miss profile") {
+ ++L1Icache.demand_misses;
+ }
+
+ action(l2m_profileMiss, "l2m", desc="l2m miss profile") {
+ ++L2cache.demand_misses;
+ }
+
+ action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
+ probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+ mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+ // END ACTIONS
+
+ // BEGIN TRANSITIONS
+
+ // transitions from base
+ transition(I, C0_Load_L1miss, I_E0S) {L1D0TagArrayRead, L2TagArrayRead} {
+ // track misses, if implemented
+ // since in I state, L2 miss as well
+ l2m_profileMiss;
+ l10m_profileMiss;
+ a0_allocateL1D;
+ l1im_profileMiss;
+ a2_allocateL2;
+ i1_invCluster;
+ ii_invIcache;
+ n_issueRdBlk;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, C1_Load_L1miss, I_E1S) {L1D1TagArrayRead, L2TagArrayRead} {
+ // track misses, if implemented
+ // since in I state, L2 miss as well
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ a2_allocateL2;
+ i0_invCluster;
+ ii_invIcache;
+ n_issueRdBlk;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, Ifetch0_L1miss, S0) {L1ITagArrayRead, L2TagArrayRead} {
+ // track misses, if implemented
+ // L2 miss as well
+ l10m_profileMiss;
+ l2m_profileMiss;
+ l1im_profileMiss;
+ ai_allocateL1I;
+ a2_allocateL2;
+ ib_invBothClusters;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} {
+ l11m_profileMiss;
+ // track misses, if implemented
+ // L2 miss as well
+ l2m_profileMiss;
+ l1im_profileMiss;
+ ai_allocateL1I;
+ a2_allocateL2;
+ ib_invBothClusters;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, C0_Store_L1miss, I_M0) {L1D0TagArrayRead,L2TagArrayRead} {
+ l2m_profileMiss;
+ l10m_profileMiss;
+ a0_allocateL1D;
+ a2_allocateL2;
+ i1_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(I, C1_Store_L1miss, I_M1) {L1D0TagArrayRead, L2TagArrayRead} {
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ a2_allocateL2;
+ i0_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(S, C0_Load_L1miss, S_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(S, C1_Load_L1miss, S_F1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(S, Ifetch0_L1miss, Si_F0) {L1ITagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+ l1im_profileMiss;
+ ai_allocateL1I;
+ fi_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(S, Ifetch1_L1miss, Si_F1) {L1ITagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l1im_profileMiss;
+ ai_allocateL1I;
+ fi_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition({S}, {C0_Store_L1hit, C0_Store_L1miss}, S_M0) {L1D0TagArrayRead, L2TagArrayRead}{
+ l2m_profileMiss;
+ l10m_profileMiss;
+ a0_allocateL1D;
+ i1_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition({S}, {C1_Store_L1hit, C1_Store_L1miss}, S_M1) {L1D1TagArrayRead,L2TagArrayRead} {
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ i0_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+ transition(Es, C0_Load_L1miss, Es_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} { // can this be folded with S_F?
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(Es, C1_Load_L1miss, Es_F1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} { // can this be folded with S_F?
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(Es, Ifetch0_L1miss, S0) {L1ITagArrayRead, L2TagArrayRead} {
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ ib_invBothClusters;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(Es, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} {
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ ib_invBothClusters;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ // THES SHOULD NOT BE INSTANTANEOUS BUT OH WELL FOR NOW
+ transition(Es, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayWrite,L1D0TagArrayRead, L2TagArrayRead, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+ a0_allocateL1D;
+ i1_invCluster;
+ s0_storeDone; // instantaneous L1/L2 dirty - no writethrough delay
+ p_popMandatoryQueue;
+ }
+
+ transition(Es, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+ a1_allocateL1D;
+ i0_invCluster;
+ s1_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, C0_Load_L1miss, E0_F) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, C1_Load_L1miss, E0_Es) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, Ifetch0_L1miss, S0) {L2TagArrayRead, L1ITagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ i0_invCluster;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead } {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ i0_invCluster;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+ a0_allocateL1D;
+ s0_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(E0, C1_Store_L1miss, M1) {L1D0TagArrayRead, L1D0TagArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+ a1_allocateL1D;
+ l11m_profileMiss;
+ i0_invCluster;
+ s1_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, C1_Load_L1miss, E1_F) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ a1_allocateL1D;
+ l11m_profileMiss;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, C0_Load_L1miss, E1_Es) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ a0_allocateL1D;
+ l10m_profileMiss;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ i1_invCluster;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, Ifetch0_L1miss, S0) {L2TagArrayRead,L1ITagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l1im_profileMiss;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ i1_invCluster;
+ nS_issueRdBlkS;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+ a1_allocateL1D;
+ s1_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(E1, C0_Store_L1miss, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ i1_invCluster;
+ s0_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition({O}, {C0_Store_L1hit, C0_Store_L1miss}, O_M0) {L1D0TagArrayRead, L2TagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue CtoD
+ l10m_profileMiss;
+ a0_allocateL1D;
+ i1_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition({O}, {C1_Store_L1hit, C1_Store_L1miss}, O_M1) {L1D1TagArrayRead, L2TagArrayRead} {
+ l2m_profileMiss; // permissions miss, still issue RdBlkS
+ l11m_profileMiss;
+ a1_allocateL1D;
+ i0_invCluster;
+ ii_invIcache;
+ nM_issueRdBlkM;
+ p_popMandatoryQueue;
+ }
+
+ transition(O, C0_Load_L1miss, O_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(O, C1_Load_L1miss, O_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(Ms, C0_Load_L1miss, Ms_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(Ms, C1_Load_L1miss, Ms_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition({Ms, M0, M1, O}, Ifetch0_L1miss, MO_S0) {L1ITagArrayRead, L2TagArrayRead} {
+ l2m_profileMiss; // permissions miss
+ l1im_profileMiss;
+ ai_allocateL1I;
+ t_allocateTBE;
+ ib_invBothClusters;
+ vd_victim;
+// i2_invL2;
+ p_popMandatoryQueue;
+ }
+
+ transition({Ms, M0, M1, O}, Ifetch1_L1miss, MO_S1) {L1ITagArrayRead L2TagArrayRead } {
+ l2m_profileMiss; // permissions miss
+ l10m_profileMiss;
+ ai_allocateL1I;
+ t_allocateTBE;
+ ib_invBothClusters;
+ vd_victim;
+// i2_invL2;
+ p_popMandatoryQueue;
+ }
+
+ transition(Ms, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+ a0_allocateL1D;
+ i1_invCluster;
+ s0_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(Ms, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+ a1_allocateL1D;
+ i0_invCluster;
+ s1_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(M0, C0_Load_L1miss, M0_F) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(M0, C1_Load_L1miss, M0_Ms) {L2TagArrayRead, L2DataArrayRead,L1D1TagArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(M0, {C0_Store_L1hit, C0_Store_L1miss}) {L1D0TagArrayRead, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead} {
+ a0_allocateL1D;
+ s0_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(M0, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead, L2TagArrayWrite} {
+ a1_allocateL1D;
+ i0_invCluster;
+ s1_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(M1, C0_Load_L1miss, M1_Ms) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(M1, C1_Load_L1miss, M1_F) {L1D1TagArrayRead L2TagArrayRead, L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(M1, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+ a0_allocateL1D;
+ i1_invCluster;
+ s0_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ transition(M1, {C1_Store_L1hit, C1_Store_L1miss}) {L1D1TagArrayRead, L1D1DataArrayWrite, L2TagArrayRead, L2DataArrayWrite} {
+ a1_allocateL1D;
+ s1_storeDone;
+ p_popMandatoryQueue;
+ }
+
+ // end transitions from base
+
+ // Begin simple hit transitions
+ transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es,
+ Ms_F1, M0_Ms}, C0_Load_L1hit) {L1D0TagArrayRead, L1D0DataArrayRead} {
+ // track hits, if implemented
+ l0_loadDone;
+ p_popMandatoryQueue;
+ }
+
+ transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es,
+ Ms_F0, M1_Ms}, C1_Load_L1hit) {L1D1TagArrayRead, L1D1DataArrayRead} {
+ // track hits, if implemented
+ l1_loadDone;
+ p_popMandatoryQueue;
+ }
+
+ transition({S, S_C, S_F0, S_F1, S_F}, Ifetch0_L1hit) {L1ITagArrayRead, L1IDataArrayRead} {
+ // track hits, if implemented
+ il0_loadDone;
+ p_popMandatoryQueue;
+ }
+
+ transition({S, S_C, S_F0, S_F1, S_F}, Ifetch1_L1hit) {L1ITagArrayRead, L1IDataArrayWrite} {
+ // track hits, if implemented
+ il1_loadDone;
+ p_popMandatoryQueue;
+ }
+
+ // end simple hit transitions
+
+ // Transitions from transient states
+
+ // recycles
+ transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+ IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F,
+ E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, C0_Load_L1hit) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M1,
+ O_M1, S0, S1, I_C, S0_C, S1_C, S_C}, C0_Load_L1miss) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+ IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F,
+ E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, C1_Load_L1hit) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M0,
+ O_M0, S0, S1, I_C, S0_C, S1_C, S_C}, C1_Load_L1miss) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, {Ifetch0_L1hit, Ifetch1_L1hit}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES,
+ IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, ES_I, MO_I, S_F0, S_F1, S_F,
+ O_F0, O_F1, O_F, S_M0, S_M1, O_M0, O_M1, Es_F0, Es_F1, Es_F, E0_F,
+ E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, I_C,
+ S_C}, {Ifetch0_L1miss, Ifetch1_L1miss}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_E1S, IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, S_F1, O_F1,
+ Si_F0, Si_F1, S_M1, O_M1, S0, S1, Es_F1, E1_F, E0_Es, Ms_F1, M0_Ms,
+ M1_F, I_C, S0_C, S1_C, S_C}, {C0_Store_L1miss}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_E0S, IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1 S_F0, O_F0,
+ Si_F0, Si_F1, S_M0, O_M0, S0, S1, Es_F0, E0_F, E1_Es, Ms_F0, M0_F,
+ M1_Ms, I_C, S0_C, S1_C, S_C}, {C1_Store_L1miss}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+ IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M0, O_M0, Es_F0, Es_F1, Es_F, E0_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_Ms}, {C0_Store_L1hit}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+ IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M1,
+ O_M1, Es_F0, Es_F1, Es_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F,
+ M0_Ms, M1_F, M1_Ms}, {C1_Store_L1hit}) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+ IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F,
+ E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, L1D0_Repl) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+ IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F,
+ E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, L1D1_Repl) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, L1I_Repl) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({S_C, S0_C, S1_C, S0, S1, Si_F0, Si_F1, I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, S_M0, O_M0, S_M1, O_M1, Es_F0, Es_F1, Es_F, E0_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, MO_S0, MO_S1, IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, L2_Repl) {} {
+ zz_recycleMandatoryQueue;
+ }
+
+ transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, {NB_AckS,
+ PrbInvData, PrbInvDataDemand, PrbInv, PrbShrData, PrbShrDataDemand}) {} {
+ zz_recycleMandatoryQueue; // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary.
+ }
+
+ transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES}, NB_AckE) {} {
+ zz_recycleMandatoryQueue; // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary.
+ }
+
+ transition({E0_Es, E1_F, Es_F1}, C0_Load_L1miss, Es_F) {L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(S_F1, C0_Load_L1miss, S_F) {L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(O_F1, C0_Load_L1miss, O_F) {L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition({Ms_F1, M0_Ms, M1_F}, C0_Load_L1miss, Ms_F) {L2DataArrayRead} {
+ l10m_profileMiss;
+ a0_allocateL1D;
+ f0_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_M0, C1_Load_L1miss, I_M0Ms){
+ l11m_profileMiss;
+ l2m_profileMiss;
+ a1_allocateL1D;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_M1, C0_Load_L1miss, I_M1Ms){
+ l10m_profileMiss;
+ l2m_profileMiss;
+ a0_allocateL1D;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_M0, C1_Store_L1miss, I_M0M1) {
+ l11m_profileMiss;
+ l2m_profileMiss;
+ a1_allocateL1D;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_M1, C0_Store_L1miss, I_M1M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L2TagArrayRead, L2TagArrayWrite} {
+ l2m_profileMiss;
+ a0_allocateL1D;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_E0S, C1_Load_L1miss, I_ES) {} {
+ l2m_profileMiss;
+ l11m_profileMiss;
+ a1_allocateL1D;
+ p_popMandatoryQueue;
+ }
+
+ transition(I_E1S, C0_Load_L1miss, I_ES) {} {
+ l2m_profileMiss;
+ l10m_profileMiss;
+ l2m_profileMiss;
+ a0_allocateL1D;
+ p_popMandatoryQueue;
+ }
+
+ transition({E1_Es, E0_F, Es_F0}, C1_Load_L1miss, Es_F) {L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(S_F0, C1_Load_L1miss, S_F) { L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition(O_F0, C1_Load_L1miss, O_F) {L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition({Ms_F0, M1_Ms, M0_F}, C1_Load_L1miss, Ms_F) {L2DataArrayRead} {
+ l11m_profileMiss;
+ a1_allocateL1D;
+ f1_L2ToL1;
+ p_popMandatoryQueue;
+ }
+
+ transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es, Ms_F1, M0_Ms}, L1D0_Repl) {L1D0TagArrayRead} {
+ i0_invCluster;
+ }
+
+ transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es, Ms_F0, M1_Ms}, L1D1_Repl) {L1D1TagArrayRead} {
+ i1_invCluster;
+ }
+
+ transition({S, S_C, S_F0, S_F1}, L1I_Repl) {L1ITagArrayRead} {
+ ii_invIcache;
+ }
+
+ transition({S, E0, E1, Es}, L2_Repl, ES_I) {L2TagArrayRead,L1D0TagArrayRead, L1D1TagArrayRead, L1ITagArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ t_allocateTBE;
+ vc_victim;
+ ib_invBothClusters;
+ i2_invL2;
+ ii_invIcache;
+ }
+
+ transition({Ms, M0, M1, O}, L2_Repl, MO_I) {L2TagArrayRead, L2TagArrayWrite, L1D0TagArrayRead, L1D1TagArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ t_allocateTBE;
+ vd_victim;
+ i2_invL2;
+ ib_invBothClusters; // nothing will happen for D0 on M1, vice versa
+ }
+
+ transition(S0, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ wi_writeIcache;
+ xi0_loadDone;
+ uu_sendUnblock;
+ sdv_sendDoneValid;
+ pr_popResponseQueue;
+ }
+
+ transition(S1, NB_AckS, S) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ wi_writeIcache;
+ xi1_loadDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(S0_C, NB_AckS, S_C) { L1IDataArrayWrite,L2DataArrayWrite} {
+ // does not need send done since the rdblks was "sinked"
+ wi_writeIcache;
+ xi0_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(S1_C, NB_AckS, S_C) { L1D1DataArrayWrite,L2DataArrayWrite} {
+ wi_writeIcache;
+ xi1_loadDone;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M0, NB_AckM, M0) { L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ w0_writeDcache;
+ xs0_storeDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+ w1_writeDcache;
+ xs1_storeDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ // THESE MO->M1 should not be instantaneous but oh well for now.
+ transition(I_M0M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+ w0_writeDcache;
+ xs0_storeDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ i0_invCluster;
+ s1_storeDone;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M1M0, NB_AckM, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+ w1_writeDcache;
+ xs1_storeDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ i1_invCluster;
+ s0_storeDone;
+ pr_popResponseQueue;
+ }
+
+ // Above shoudl be more like this, which has some latency to xfer to L1
+ transition(I_M0Ms, NB_AckM, M0_Ms) {L1D0DataArrayWrite,L2DataArrayWrite} {
+ w0_writeDcache;
+ xs0_storeDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ f1_L2ToL1;
+ pr_popResponseQueue;
+ }
+
+ transition(I_M1Ms, NB_AckM, M1_Ms) {L1D1DataArrayWrite,L2DataArrayWrite} {
+ w1_writeDcache;
+ xs1_storeDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ f0_L2ToL1;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E0S, NB_AckE, E0) {L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ w0_writeDcache;
+ xl0_loadDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E1S, NB_AckE, E1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ w1_writeDcache;
+ xl1_loadDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_ES, NB_AckE, Es) {L1D1DataArrayWrite, L1D1TagArrayWrite, L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite } {
+ w0_writeDcache;
+ xl0_loadDone;
+ w1_writeDcache;
+ xl1_loadDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E0S, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ w0_writeDcache;
+ xl0_loadDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_E1S, NB_AckS, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+ w1_writeDcache;
+ xl1_loadDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(I_ES, NB_AckS, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+ w0_writeDcache;
+ xl0_loadDone;
+ w1_writeDcache;
+ xl1_loadDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(S_F0, L2_to_L1D0, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(S_F1, L2_to_L1D1, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Si_F0, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ ci_copyL2ToL1;
+ il0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Si_F1, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ ci_copyL2ToL1;
+ il1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(S_F, L2_to_L1D0, S_F1) { L1D0DataArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(S_F, L2_to_L1D1, S_F0) { L1D1DataArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(O_F0, L2_to_L1D0, O) { L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(O_F1, L2_to_L1D1, O) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(O_F, L2_to_L1D0, O_F1) { L1D0DataArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(O_F, L2_to_L1D1, O_F0) { L1D1DataArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(M1_F, L2_to_L1D1, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(M0_F, L2_to_L1D0, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Ms_F0, L2_to_L1D0, Ms) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Ms_F1, L2_to_L1D1, Ms) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Ms_F, L2_to_L1D0, Ms_F1) {L1D0DataArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Ms_F, L2_to_L1D1, Ms_F0) {L1IDataArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(M1_Ms, L2_to_L1D0, Ms) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(M0_Ms, L2_to_L1D1, Ms) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Es_F0, L2_to_L1D0, Es) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Es_F1, L2_to_L1D1, Es) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Es_F, L2_to_L1D0, Es_F1) {L2TagArrayRead, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(Es_F, L2_to_L1D1, Es_F0) {L2TagArrayRead, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(E0_F, L2_to_L1D0, E0) {L2TagArrayRead, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(E1_F, L2_to_L1D1, E1) {L2TagArrayRead, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(E1_Es, L2_to_L1D0, Es) {L2TagArrayRead, L2DataArrayRead} {
+ c0_copyL2ToL1;
+ l0_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(E0_Es, L2_to_L1D1, Es) {L2TagArrayRead, L2DataArrayRead} {
+ c1_copyL2ToL1;
+ l1_loadDone;
+ pt_popTriggerQueue;
+ }
+
+ transition(IF_E0S, L2_to_L1D0, I_E0S) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF_E1S, L2_to_L1D1, I_E1S) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF_ES, L2_to_L1D0, IF1_ES) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF_ES, L2_to_L1D1, IF0_ES) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF0_ES, L2_to_L1D0, I_ES) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(IF1_ES, L2_to_L1D1, I_ES) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(F_S0, L2_to_L1I, S0) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(F_S1, L2_to_L1I, S1) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition({S_M0, O_M0}, NB_AckM, M0) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ xs0_storeDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition({S_M1, O_M1}, NB_AckM, M1) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+ xs1_storeDone;
+ sdv_sendDoneValid;
+ uu_sendUnblock;
+ pr_popResponseQueue;
+ }
+
+ transition(MO_I, NB_AckWB, I) {L2TagArrayWrite} {
+ wb_data;
+ ra_sendReplAck;
+ sdi_sendDoneInvalid;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(ES_I, NB_AckWB, I) {L2TagArrayWrite} {
+ wb_data;
+ ra_sendReplAck;
+ sdi_sendDoneInvalid;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(MO_S0, NB_AckWB, S0) {L2TagArrayWrite} {
+ wb_data;
+ i2_invL2;
+ a2_allocateL2;
+ sdv_sendDoneValid;
+ nS_issueRdBlkS;
+ d_deallocateTBE; // FOO
+ pr_popResponseQueue;
+ }
+
+ transition(MO_S1, NB_AckWB, S1) {L2TagArrayWrite} {
+ wb_data;
+ i2_invL2;
+ a2_allocateL2;
+ sdv_sendDoneValid;
+ nS_issueRdBlkS;
+ d_deallocateTBE; // FOO
+ pr_popResponseQueue;
+ }
+
+ // Writeback cancel "ack"
+ transition(I_C, NB_AckWB, I) {L2TagArrayWrite} {
+ ss_sendStaleNotification;
+ sdi_sendDoneInvalid;
+ d_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(S0_C, NB_AckWB, S0) {L2TagArrayWrite} {
+ ss_sendStaleNotification;
+ sdv_sendDoneValid;
+ pr_popResponseQueue;
+ }
+
+ transition(S1_C, NB_AckWB, S1) {L2TagArrayWrite} {
+ ss_sendStaleNotification;
+ sdv_sendDoneValid;
+ pr_popResponseQueue;
+ }
+
+ transition(S_C, NB_AckWB, S) {L2TagArrayWrite} {
+ ss_sendStaleNotification;
+ sdv_sendDoneValid;
+ pr_popResponseQueue;
+ }
+
+ // Begin Probe Transitions
+
+ transition({Ms, M0, M1, O}, {PrbInvData, PrbInvDataDemand}, I) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pd_sendProbeResponseData;
+ i2_invL2;
+ ib_invBothClusters;
+ pp_popProbeQueue;
+ }
+
+ transition({Es, E0, E1, S, I}, {PrbInvData, PrbInvDataDemand}, I) {L2TagArrayRead, L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ ib_invBothClusters;
+ ii_invIcache; // only relevant for S
+ pp_popProbeQueue;
+ }
+
+ transition(S_C, {PrbInvData, PrbInvDataDemand}, I_C) {L2TagArrayWrite} {
+ t_allocateTBE;
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, {PrbInvData, PrbInvDataDemand}, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms, M0, M1, O, Es, E0, E1, S, I}, PrbInv, I) {L2TagArrayRead, L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2; // nothing will happen in I
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(S_C, PrbInv, I_C) {L2TagArrayWrite} {
+ t_allocateTBE;
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(I_C, PrbInv, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms, M0, M1, O}, {PrbShrData, PrbShrDataDemand}, O) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({Es, E0, E1, S}, {PrbShrData, PrbShrDataDemand}, S) {L2TagArrayRead, L2TagArrayWrite} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition(S_C, {PrbShrData, PrbShrDataDemand}) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition({I, I_C}, {PrbShrData, PrbShrDataDemand}) {L2TagArrayRead} {
+ pb_sendProbeResponseBackprobe;
+ pp_popProbeQueue;
+ }
+
+ transition({I_M0, I_E0S}, {PrbInv, PrbInvData, PrbInvDataDemand}) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters; // must invalidate current data (only relevant for I_M0)
+ a0_allocateL1D; // but make sure there is room for incoming data when it arrives
+ pp_popProbeQueue;
+ }
+
+ transition({I_M1, I_E1S}, {PrbInv, PrbInvData, PrbInvDataDemand}) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters; // must invalidate current data (only relevant for I_M1)
+ a1_allocateL1D; // but make sure there is room for incoming data when it arrives
+ pp_popProbeQueue;
+ }
+
+ transition({I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_ES}, {PrbInv, PrbInvData, PrbInvDataDemand, PrbShrData, PrbShrDataDemand}) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ a0_allocateL1D;
+ a1_allocateL1D;
+ pp_popProbeQueue;
+ }
+
+ transition({I_M0, I_E0S, I_M1, I_E1S}, {PrbShrData, PrbShrDataDemand}) {} {
+ pb_sendProbeResponseBackprobe;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, {PrbInvData, PrbInvDataDemand}, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, {PrbInvData, PrbInvDataDemand}, I_C) {} {
+ pdt_sendProbeResponseDataFromTBE;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, PrbInv, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, PrbInv, I_C) {} {
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ pp_popProbeQueue;
+ }
+
+ transition(ES_I, {PrbShrData, PrbShrDataDemand}, ES_I) {} {
+ ph_sendProbeResponseHit;
+ s_setSharedFlip;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_I, {PrbShrData, PrbShrDataDemand}, MO_I) {} {
+ pdt_sendProbeResponseDataFromTBE;
+ s_setSharedFlip;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_S0, {PrbInvData, PrbInvDataDemand}, S0_C) {L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pdt_sendProbeResponseDataFromTBE;
+ i2_invL2;
+ a2_allocateL2;
+ nS_issueRdBlkS;
+ d_deallocateTBE;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_S1, {PrbInvData, PrbInvDataDemand}, S1_C) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pdt_sendProbeResponseDataFromTBE;
+ i2_invL2;
+ a2_allocateL2;
+ nS_issueRdBlkS;
+ d_deallocateTBE;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_S0, PrbInv, S0_C) {L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ a2_allocateL2;
+ nS_issueRdBlkS;
+ d_deallocateTBE;
+ pp_popProbeQueue;
+ }
+
+ transition(MO_S1, PrbInv, S1_C) {L2TagArrayWrite} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ i2_invL2;
+ a2_allocateL2;
+ nS_issueRdBlkS;
+ d_deallocateTBE;
+ pp_popProbeQueue;
+ }
+
+ transition({MO_S0, MO_S1}, {PrbShrData, PrbShrDataDemand}) {} {
+ pdt_sendProbeResponseDataFromTBE;
+ s_setSharedFlip;
+ pp_popProbeQueue;
+ }
+
+ transition({S_F0, Es_F0, E0_F, E1_Es}, {PrbInvData, PrbInvDataDemand, PrbInv}, IF_E0S) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ // invalidate everything you've got
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ // but make sure you have room for what you need from the fill
+ a0_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({S_F1, Es_F1, E1_F, E0_Es}, {PrbInvData, PrbInvDataDemand, PrbInv}, IF_E1S) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ // invalidate everything you've got
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ // but make sure you have room for what you need from the fill
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({S_F, Es_F}, {PrbInvData, PrbInvDataDemand, PrbInv}, IF_ES) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ // invalidate everything you've got
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ // but make sure you have room for what you need from the fill
+ a0_allocateL1D;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition(Si_F0, {PrbInvData, PrbInvDataDemand, PrbInv}, F_S0) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ nS_issueRdBlkS;
+ pp_popProbeQueue;
+ }
+
+ transition(Si_F1, {PrbInvData, PrbInvDataDemand, PrbInv}, F_S1) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ nS_issueRdBlkS;
+ pp_popProbeQueue;
+ }
+
+ transition({Es_F0, E0_F, E1_Es}, {PrbShrData, PrbShrDataDemand}, S_F0) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition({Es_F1, E1_F, E0_Es}, {PrbShrData, PrbShrDataDemand}, S_F1) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition(Es_F, {PrbShrData, PrbShrDataDemand}, S_F) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition({S_F0, S_F1, S_F, Si_F0, Si_F1}, {PrbShrData, PrbShrDataDemand}) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition(S_M0, {PrbInvData, PrbInvDataDemand}, I_M0) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pim_sendProbeResponseInvMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition(O_M0, {PrbInvData, PrbInvDataDemand}, I_M0) {L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pdm_sendProbeResponseDataMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S_M0, O_M0}, {PrbInv}, I_M0) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pim_sendProbeResponseInvMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition(S_M1, {PrbInvData, PrbInvDataDemand}, I_M1) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pim_sendProbeResponseInvMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition(O_M1, {PrbInvData, PrbInvDataDemand}, I_M1) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pdm_sendProbeResponseDataMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S_M1, O_M1}, {PrbInv}, I_M1) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pim_sendProbeResponseInvMs;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S0, S0_C}, {PrbInvData, PrbInvDataDemand, PrbInv}) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S1, S1_C}, {PrbInvData, PrbInvDataDemand, PrbInv}) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ ii_invIcache;
+ i2_invL2;
+ ai_allocateL1I;
+ a2_allocateL2;
+ pp_popProbeQueue;
+ }
+
+ transition({S_M0, S_M1}, {PrbShrData, PrbShrDataDemand}) {} {
+ ph_sendProbeResponseHit;
+ pp_popProbeQueue;
+ }
+
+ transition({O_M0, O_M1}, {PrbShrData, PrbShrDataDemand}) {L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({S0, S1, S0_C, S1_C}, {PrbShrData, PrbShrDataDemand}) {} {
+ pb_sendProbeResponseBackprobe;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F0, M0_F, M1_Ms, O_F0}, {PrbInvData, PrbInvDataDemand}, IF_E0S) {L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pd_sendProbeResponseData;
+ ib_invBothClusters;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F1, M1_F, M0_Ms, O_F1}, {PrbInvData, PrbInvDataDemand}, IF_E1S) {L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pd_sendProbeResponseData;
+ ib_invBothClusters;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F, O_F}, {PrbInvData, PrbInvDataDemand}, IF_ES) {L2DataArrayRead} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pd_sendProbeResponseData;
+ ib_invBothClusters;
+ i2_invL2;
+ a0_allocateL1D;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F0, M0_F, M1_Ms, O_F0}, PrbInv, IF_E0S) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ i2_invL2;
+ a0_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F1, M1_F, M0_Ms, O_F1}, PrbInv, IF_E1S) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ i2_invL2;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F, O_F}, PrbInv, IF_ES) {} {
+ forward_eviction_to_cpu0;
+ forward_eviction_to_cpu1;
+ pi_sendProbeResponseInv;
+ ib_invBothClusters;
+ i2_invL2;
+ a0_allocateL1D;
+ a1_allocateL1D;
+ a2_allocateL2;
+ n_issueRdBlk;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F0, M0_F, M1_Ms}, {PrbShrData, PrbShrDataDemand}, O_F0) {L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({Ms_F1, M1_F, M0_Ms}, {PrbShrData, PrbShrDataDemand}, O_F1) {} {
+ }
+
+ transition({Ms_F}, {PrbShrData, PrbShrDataDemand}, O_F) {L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ transition({O_F0, O_F1, O_F}, {PrbShrData, PrbShrDataDemand}) {L2DataArrayRead} {
+ pd_sendProbeResponseData;
+ pp_popProbeQueue;
+ }
+
+ // END TRANSITIONS
+}
+
+
diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm
new file mode 100644
index 000000000..52d87fb8b
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm
@@ -0,0 +1,2038 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:Directory, "AMD_Base-like protocol")
+: DirectoryMemory * directory;
+ CacheMemory * L3CacheMemory;
+ Cycles response_latency := 5;
+ Cycles response_latency_regionDir := 1;
+ Cycles l3_hit_latency := 30;
+ bool useL3OnWT := "False";
+ Cycles to_memory_controller_latency := 1;
+
+ // From the Cores
+ MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseFromCores, network="From", virtual_network="2", vnet_type="response";
+ MessageBuffer * unblockFromCores, network="From", virtual_network="4", vnet_type="unblock";
+
+ // To the Cores
+ MessageBuffer * probeToCore, network="To", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseToCore, network="To", virtual_network="2", vnet_type="response";
+
+ // From region buffer
+ MessageBuffer * reqFromRegBuf, network="From", virtual_network="7", vnet_type="request";
+
+ // To Region directory
+ MessageBuffer * reqToRegDir, network="To", virtual_network="5", vnet_type="request";
+ MessageBuffer * reqFromRegDir, network="From", virtual_network="5", vnet_type="request";
+ MessageBuffer * unblockToRegDir, network="To", virtual_network="4", vnet_type="unblock";
+
+ MessageBuffer * triggerQueue;
+ MessageBuffer * L3triggerQueue;
+ MessageBuffer * responseFromMemory;
+{
+ // STATES
+ state_declaration(State, desc="Directory states", default="Directory_State_U") {
+ U, AccessPermission:Backing_Store, desc="unblocked";
+ BR, AccessPermission:Backing_Store, desc="got CPU read request, blocked while sent to L3";
+ BW, AccessPermission:Backing_Store, desc="got CPU write request, blocked while sent to L3";
+ BL, AccessPermission:Busy, desc="got L3 WB request";
+ // BL is Busy because it's possible for the data only to be in the network
+ // in the WB, L3 has sent it and gone on with its business in possibly I
+ // state.
+ BI, AccessPermission:Backing_Store, desc="Blocked waiting for inv ack from core";
+ BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
+ BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
+ B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
+ BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory";
+ BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
+ BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
+ B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
+ BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack";
+
+ // These are needed for when a private requests was issued before an inv was received
+ // for writebacks
+ BS_Pm_BL, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ BM_Pm_BL, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ B_Pm_BL, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ BP_BL, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory";
+ // for reads
+ BS_Pm_B, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ BM_Pm_B, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ B_Pm_B, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ BP_B, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory";
+ }
+
+ // Events
+ enumeration(Event, desc="Directory events") {
+ // CPU requests
+ RdBlkS, desc="...";
+ RdBlkM, desc="...";
+ RdBlk, desc="...";
+ WriteThrough, desc="WriteThrough Message";
+ Atomic, desc="Atomic Message";
+
+ RdBlkSP, desc="...";
+ RdBlkMP, desc="...";
+ RdBlkP, desc="...";
+ VicDirtyP, desc="...";
+ VicCleanP, desc="...";
+ WriteThroughP, desc="WriteThrough Message";
+ AtomicP, desc="Atomic Message";
+
+ // writebacks
+ VicDirty, desc="...";
+ VicClean, desc="...";
+ CPUData, desc="WB data from CPU";
+ StaleWB, desc="WB response for a no longer valid request";
+
+ // probe responses
+ CPUPrbResp, desc="Probe Response Msg";
+ LastCPUPrbResp, desc="Last Probe Response Msg";
+
+ ProbeAcksComplete, desc="Probe Acks Complete";
+
+ L3Hit, desc="Hit in L3 return data to core";
+
+ // Memory Controller
+ MemData, desc="Fetched data from memory arrives";
+ WBAck, desc="Writeback Ack from memory arrives";
+
+ CoreUnblock, desc="Core received data, unblock";
+ UnblockWriteThrough, desc="unblock, self triggered";
+
+ StaleVicDirty, desc="Core invalidated before VicDirty processed";
+ StaleVicDirtyP, desc="Core invalidated before VicDirty processed";
+
+ // For region protocol
+ CPUReq, desc="Generic CPU request";
+ Inv, desc="Region dir needs a block invalidated";
+ Downgrade, desc="Region dir needs a block downgraded";
+
+ // For private accesses (bypassed reg-dir)
+ CPUReadP, desc="Initial req from core, sent to L3";
+ CPUWriteP, desc="Initial req from core, sent to L3";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ L3DataArrayRead, desc="Read the data array";
+ L3DataArrayWrite, desc="Write the data array";
+ L3TagArrayRead, desc="Read the data array";
+ L3TagArrayWrite, desc="Write the data array";
+ }
+
+ // TYPES
+
+ // DirectoryEntry
+ structure(Entry, desc="...", interface="AbstractEntry") {
+ State DirectoryState, desc="Directory state";
+ DataBlock DataBlk, desc="data for the block";
+ NetDest VicDirtyIgnore, desc="VicDirty coming from whom to ignore";
+ }
+
+ structure(CacheEntry, desc="...", interface="AbstractCacheEntry") {
+ DataBlock DataBlk, desc="data for the block";
+ MachineID LastSender, desc="Mach which this block came from";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block";
+ DataBlock DataBlkAux, desc="Auxiliary data for the block";
+ bool Dirty, desc="Is the data dirty?";
+ int NumPendingAcks, desc="num acks expected";
+ MachineID OriginalRequestor, desc="Original Requestor";
+ MachineID WTRequestor, desc="WT Requestor";
+ bool Cached, desc="data hit in Cache";
+ bool MemData, desc="Got MemData?",default="false";
+ bool wtData, desc="Got write through data?",default="false";
+ bool atomicData, desc="Got Atomic op?",default="false";
+ Cycles InitialRequestTime, desc="...";
+ Cycles ForwardRequestTime, desc="...";
+ Cycles ProbeRequestStartTime, desc="...";
+ bool DemandRequest, desc="for profiling";
+ MachineID LastSender, desc="Mach which this block came from";
+ bool L3Hit, default="false", desc="Was this an L3 hit?";
+ bool TriggeredAcksComplete, default="false", desc="True if already triggered acks complete";
+ WriteMask writeMask, desc="outstanding write through mask";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ void set_tbe(TBE a);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" {
+ Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr));
+
+ if (is_valid(dir_entry)) {
+ //DPRINTF(RubySlicc, "Getting entry %s: %s\n", addr, dir_entry.DataBlk);
+ return dir_entry;
+ }
+
+ dir_entry := static_cast(Entry, "pointer",
+ directory.allocate(addr, new Entry));
+ return dir_entry;
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if (is_valid(tbe) && tbe.MemData) {
+ DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe);
+ return tbe.DataBlk;
+ }
+ DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr));
+ return getDirectoryEntry(addr).DataBlk;
+ }
+
+ State getState(TBE tbe, CacheEntry entry, Addr addr) {
+ return getDirectoryEntry(addr).DirectoryState;
+ }
+
+ State getStateFromAddr(Addr addr) {
+ return getDirectoryEntry(addr).DirectoryState;
+ }
+
+ void setState(TBE tbe, CacheEntry entry, Addr addr, State state) {
+ getDirectoryEntry(addr).DirectoryState := state;
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ // For this Directory, all permissions are just tracked in Directory, since
+ // it's not possible to have something in TBE but not Dir, just keep track
+ // of state all in one place.
+ if(directory.isPresent(addr)) {
+ return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ void setAccessPermission(CacheEntry entry, Addr addr, State state) {
+ getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state));
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L3DataArrayRead) {
+ L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L3DataArrayWrite) {
+ L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L3TagArrayRead) {
+ L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L3TagArrayWrite) {
+ L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L3DataArrayRead) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L3DataArrayWrite) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L3TagArrayRead) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L3TagArrayWrite) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ // ** OUT_PORTS **
+ out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore);
+ out_port(responseNetwork_out, ResponseMsg, responseToCore);
+
+ out_port(requestNetworkReg_out, CPURequestMsg, reqToRegDir);
+ out_port(regAckNetwork_out, UnblockMsg, unblockToRegDir);
+
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+ out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue);
+
+ // ** IN_PORTS **
+
+ // Trigger Queue
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=7) {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == TriggerType:AcksComplete) {
+ trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == TriggerType:UnblockWriteThrough) {
+ trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe);
+ } else {
+ error("Unknown trigger msg");
+ }
+ }
+ }
+ }
+
+ in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=6) {
+ if (L3TriggerQueue_in.isReady(clockEdge())) {
+ peek(L3TriggerQueue_in, TriggerMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == TriggerType:L3Hit) {
+ trigger(Event:L3Hit, in_msg.addr, entry, tbe);
+ } else {
+ error("Unknown trigger msg");
+ }
+ }
+ }
+ }
+
+ // Unblock Network
+ in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=5) {
+ if (unblockNetwork_in.isReady(clockEdge())) {
+ peek(unblockNetwork_in, UnblockMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ trigger(Event:CoreUnblock, in_msg.addr, entry, tbe);
+ }
+ }
+ }
+
+ // Core response network
+ in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=4) {
+ if (responseNetwork_in.isReady(clockEdge())) {
+ peek(responseNetwork_in, ResponseMsg) {
+ DPRINTF(RubySlicc, "core responses %s\n", in_msg);
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+ if (is_valid(tbe) && tbe.NumPendingAcks == 1
+ && tbe.TriggeredAcksComplete == false) {
+ trigger(Event:LastCPUPrbResp, in_msg.addr, entry, tbe);
+ } else {
+ trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceResponseType:CPUData) {
+ trigger(Event:CPUData, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+ trigger(Event:StaleWB, in_msg.addr, entry, tbe);
+ } else {
+ error("Unexpected response type");
+ }
+ }
+ }
+ }
+
+ // off-chip memory request/response is done
+ in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=3) {
+ if (memQueue_in.isReady(clockEdge())) {
+ peek(memQueue_in, MemoryMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
+ trigger(Event:MemData, in_msg.addr, entry, tbe);
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
+ trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them.
+ } else {
+ DPRINTF(RubySlicc, "%s\n", in_msg.Type);
+ error("Invalid message");
+ }
+ }
+ }
+ }
+
+ in_port(regBuf_in, CPURequestMsg, reqFromRegBuf, rank=2) {
+ if (regBuf_in.isReady(clockEdge())) {
+ peek(regBuf_in, CPURequestMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == CoherenceRequestType:ForceInv) {
+ trigger(Event:Inv, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:ForceDowngrade) {
+ trigger(Event:Downgrade, in_msg.addr, entry, tbe);
+ } else {
+ error("Bad request from region buffer");
+ }
+ }
+ }
+ }
+
+ in_port(regDir_in, CPURequestMsg, reqFromRegDir, rank=1) {
+ if (regDir_in.isReady(clockEdge())) {
+ peek(regDir_in, CPURequestMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == CoherenceRequestType:RdBlk) {
+ trigger(Event:RdBlk, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+ trigger(Event:RdBlkS, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+ trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+ trigger(Event:Atomic, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+ if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+ DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr);
+ trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+ } else {
+ trigger(Event:VicDirty, in_msg.addr, entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+ if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+ DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr);
+ trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+ } else {
+ trigger(Event:VicClean, in_msg.addr, entry, tbe);
+ }
+ } else {
+ error("Bad message type fwded from Region Dir");
+ }
+ }
+ }
+ }
+
+ in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) {
+ if (requestNetwork_in.isReady(clockEdge())) {
+ peek(requestNetwork_in, CPURequestMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Private) {
+ // Bypass the region dir
+ if (in_msg.Type == CoherenceRequestType:RdBlk) {
+ trigger(Event:RdBlkP, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+ trigger(Event:RdBlkSP, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+ trigger(Event:RdBlkMP, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+ trigger(Event:AtomicP, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ trigger(Event:WriteThroughP, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+ if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+ DPRINTF(RubySlicc, "Dropping VicDirtyP for address %s\n", in_msg.addr);
+ trigger(Event:StaleVicDirtyP, in_msg.addr, entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr);
+ trigger(Event:VicDirtyP, in_msg.addr, entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+ if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+ DPRINTF(RubySlicc, "Dropping VicCleanP for address %s\n", in_msg.addr);
+ trigger(Event:StaleVicDirtyP, in_msg.addr, entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr);
+ trigger(Event:VicCleanP, in_msg.addr, entry, tbe);
+ }
+ } else {
+ error("Bad message type for private access");
+ }
+ } else {
+ trigger(Event:CPUReq, in_msg.addr, entry, tbe);
+ }
+ }
+ }
+ }
+
+ // Actions
+ action(s_sendResponseS, "s", desc="send Shared response") {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Shared;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.DemandRequest := tbe.DemandRequest;
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(es_sendResponseES, "es", desc="send Exclusive or Shared response") {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Cached) {
+ out_msg.State := CoherenceState:Shared;
+ } else {
+ out_msg.State := CoherenceState:Exclusive;
+ }
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.DemandRequest := tbe.DemandRequest;
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(m_sendResponseM, "m", desc="send Modified response") {
+ if (tbe.wtData) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:UnblockWriteThrough;
+ }
+ } else {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.CtoD := false;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.DemandRequest := tbe.DemandRequest;
+ out_msg.L3Hit := tbe.L3Hit;
+ if (tbe.atomicData) {
+ out_msg.WTRequestor := tbe.WTRequestor;
+ }
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ if (tbe.atomicData) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:UnblockWriteThrough;
+ }
+ }
+ }
+ }
+
+ action(sb_sendResponseSBypass, "sb", desc="send Shared response") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Shared;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.DemandRequest := false;
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(esb_sendResponseESBypass, "esb", desc="send Exclusive or Shared response") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Cached || in_msg.ForceShared) {
+ out_msg.State := CoherenceState:Shared;
+ } else {
+ out_msg.State := CoherenceState:Exclusive;
+ }
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.DemandRequest := false;
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(mbwt_sendResponseWriteThroughBypass, "mbwt", desc="send write through response") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysWBAck;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ out_msg.DemandRequest := false;
+ }
+ } else {
+ assert(in_msg.Type == CoherenceRequestType:Atomic);
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := getDirectoryEntry(address).DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := in_msg.Dirty;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.CtoD := false;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.DemandRequest := false;
+ out_msg.L3Hit := tbe.L3Hit;
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:UnblockWriteThrough;
+ }
+ }
+ }
+
+ action(mb_sendResponseMBypass, "mb", desc="send Modified response") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.CtoD := false;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.DemandRequest := false;
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(c_sendResponseCtoD, "c", desc="send CtoD Ack") {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.CtoD := true;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.DemandRequest := tbe.DemandRequest;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(cp_sendResponseCtoDP, "cp", desc="send CtoD Ack") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.CtoD := true;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ out_msg.DemandRequest := false;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+ peek(regDir_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysWBAck;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ out_msg.DemandRequest := false;
+ }
+ }
+ }
+
+ action(wp_sendResponseWBAckP, "wp", desc="send WB Ack") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysWBAck;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ out_msg.DemandRequest := false;
+ }
+ }
+ }
+
+ action(wc_sendResponseWBAck, "wc", desc="send WB Ack for cancel") {
+ peek(responseNetwork_in, ResponseMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysWBAck;
+ out_msg.Destination.add(in_msg.Sender);
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(ra_ackRegionDir, "ra", desc="Ack region dir") {
+ peek(regDir_in, CPURequestMsg) {
+ if (in_msg.NoAckNeeded == false) {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency_regionDir) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DirReadyAck;
+ out_msg.Destination.add(map_Address_to_RegionDir(address));
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+ }
+
+ action(l_queueMemRdReq, "lr", desc="Read data from memory") {
+ peek(regDir_in, CPURequestMsg) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ tbe.DataBlk := entry.DataBlk;
+ tbe.LastSender := entry.LastSender;
+ tbe.L3Hit := true;
+ tbe.MemData := true;
+ DPRINTF(RubySlicc, "L3 data is %s\n", entry.DataBlk);
+ L3CacheMemory.deallocate(address);
+ } else {
+ queueMemoryRead(machineID, address, to_memory_controller_latency);
+ }
+ }
+ }
+
+ action(lrp_queueMemRdReqP, "lrp", desc="Read data from memory") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ tbe.DataBlk := entry.DataBlk;
+ tbe.LastSender := entry.LastSender;
+ tbe.L3Hit := true;
+ tbe.MemData := true;
+ DPRINTF(RubySlicc, "L3 data is %s\n", entry.DataBlk);
+ L3CacheMemory.deallocate(address);
+ } else {
+ queueMemoryRead(machineID, address, to_memory_controller_latency);
+ }
+ }
+ }
+
+ action(dcr_probeInvCoreData, "dcr", desc="probe inv cores, return data") {
+ peek(regBuf_in, CPURequestMsg) {
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination := in_msg.Sharers;
+ tbe.NumPendingAcks := tbe.NumPendingAcks + in_msg.Sharers.count();
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ APPEND_TRANSITION_COMMENT(" dcr: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(ddr_probeDownCoreData, "ddr", desc="probe inv cores, return data") {
+ peek(regBuf_in, CPURequestMsg) {
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination := in_msg.Sharers;
+ tbe.NumPendingAcks := tbe.NumPendingAcks + in_msg.Sharers.count();
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ APPEND_TRANSITION_COMMENT(" dcr: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
+ peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket
+ tbe.NumPendingAcks := tbe.NumPendingAcks +machineCount(MachineType:CorePair) - 1;
+ out_msg.Destination.broadcast(MachineType:TCP);
+ tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:TCP);
+ out_msg.Destination.broadcast(MachineType:SQC);
+ tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:SQC);
+ out_msg.Destination.remove(in_msg.Requestor);
+ DPRINTF(RubySlicc, "%s\n", (out_msg));
+ APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") {
+ peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := false;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket
+ tbe.NumPendingAcks := tbe.NumPendingAcks +machineCount(MachineType:CorePair) - 1;
+ out_msg.Destination.broadcast(MachineType:TCP);
+ tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:TCP);
+ out_msg.Destination.broadcast(MachineType:SQC);
+ tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:SQC);
+ out_msg.Destination.remove(in_msg.Requestor);
+ APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(d_writeDataToMemory, "d", desc="Write data to memory") {
+ peek(responseNetwork_in, ResponseMsg) {
+ getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+ DPRINTF(RubySlicc, "Writing Data: %s to address %s\n", in_msg.DataBlk,
+ in_msg.addr);
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ peek(regDir_in, CPURequestMsg) {
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ tbe.writeMask.clear();
+ tbe.writeMask.orMask(in_msg.writeMask);
+ tbe.wtData := true;
+ tbe.WTRequestor := in_msg.WTRequestor;
+ tbe.LastSender := in_msg.Requestor;
+ }
+ if (in_msg.Type == CoherenceRequestType:Atomic) {
+ tbe.writeMask.clear();
+ tbe.writeMask.orMask(in_msg.writeMask);
+ tbe.atomicData := true;
+ tbe.WTRequestor := in_msg.WTRequestor;
+ tbe.LastSender := in_msg.Requestor;
+ }
+ tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+ tbe.Dirty := false;
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask);
+ tbe.Dirty := false;
+ }
+ tbe.OriginalRequestor := in_msg.Requestor;
+ tbe.NumPendingAcks := 0;
+ tbe.Cached := in_msg.ForceShared;
+ tbe.InitialRequestTime := in_msg.InitialRequestTime;
+ tbe.ForwardRequestTime := curCycle();
+ tbe.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ tbe.DemandRequest := in_msg.DemandRequest;
+ }
+ }
+
+ action(tp_allocateTBEP, "tp", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ peek(requestNetwork_in, CPURequestMsg) {
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ tbe.writeMask.clear();
+ tbe.writeMask.orMask(in_msg.writeMask);
+ tbe.wtData := true;
+ tbe.WTRequestor := in_msg.WTRequestor;
+ tbe.LastSender := in_msg.Requestor;
+ }
+ if (in_msg.Type == CoherenceRequestType:Atomic) {
+ tbe.writeMask.clear();
+ tbe.writeMask.orMask(in_msg.writeMask);
+ tbe.atomicData := true;
+ tbe.WTRequestor := in_msg.WTRequestor;
+ tbe.LastSender := in_msg.Requestor;
+ }
+ tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+ tbe.Dirty := false;
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask);
+ tbe.Dirty := false;
+ }
+ tbe.OriginalRequestor := in_msg.Requestor;
+ tbe.NumPendingAcks := 0;
+ tbe.Cached := in_msg.ForceShared;
+ tbe.InitialRequestTime := in_msg.InitialRequestTime;
+ tbe.ForwardRequestTime := curCycle();
+ tbe.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+ tbe.DemandRequest := false;
+ }
+ }
+
+ action(sa_setAcks, "sa", desc="setAcks") {
+ peek(regDir_in, CPURequestMsg) {
+ tbe.NumPendingAcks := in_msg.Acks;
+ APPEND_TRANSITION_COMMENT(" waiting for acks ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ }
+ }
+
+ action(tr_allocateTBE, "tr", desc="allocate TBE Entry for Region inv") {
+ check_allocate(TBEs);
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.NumPendingAcks := 0;
+ }
+
+ action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(wdp_writeBackDataPrivate, "wdp", desc="Write back data if needed") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ tbe.DataBlkAux := getDirectoryEntry(address).DataBlk;
+ tbe.DataBlkAux.copyPartial(in_msg.DataBlk,in_msg.writeMask);
+ getDirectoryEntry(address).DataBlk := tbe.DataBlkAux;
+ } else{
+ assert(in_msg.Type == CoherenceRequestType:Atomic);
+ tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask);
+ getDirectoryEntry(address).DataBlk := tbe.DataBlkAux;
+ }
+ }
+ }
+
+ action(wd_writeBackData, "wd", desc="Write back data if needed") {
+ if (tbe.wtData) {
+ DataBlock tmp := getDirectoryEntry(address).DataBlk;
+ tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+ tbe.DataBlk := tmp;
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ } else if (tbe.atomicData) {
+ tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask);
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ } else if (tbe.Dirty == true) {
+ APPEND_TRANSITION_COMMENT(" Wrote data back ");
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ }
+ }
+
+ action(wdi_writeBackDataInv, "wdi", desc="Write back inv data if needed") {
+ // Kind of opposite from above...?
+ if (tbe.Dirty == true) {
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ APPEND_TRANSITION_COMMENT("Writing dirty data to dir");
+ DPRINTF(RubySlicc, "Data %s: %s\n", address, tbe.DataBlk);
+ } else {
+ APPEND_TRANSITION_COMMENT("NOT!!! Writing dirty data to dir");
+ }
+ }
+
+ action(wdt_writeBackDataInvNoTBE, "wdt", desc="Write back inv data if needed no TBE") {
+ // Kind of opposite from above...?
+ peek(responseNetwork_in, ResponseMsg) {
+ if (in_msg.Dirty == true) {
+ getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+ APPEND_TRANSITION_COMMENT("Writing dirty data to dir");
+ DPRINTF(RubySlicc, "Data %s: %s\n", address, in_msg.DataBlk);
+ } else {
+ APPEND_TRANSITION_COMMENT("NOT!!! Writing dirty data to dir");
+ }
+ }
+ }
+
+ action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") {
+ peek(memQueue_in, MemoryMsg) {
+ if (tbe.Dirty == false) {
+ tbe.DataBlk := getDirectoryEntry(address).DataBlk;
+ }
+ tbe.MemData := true;
+ }
+ }
+
+ action(ml_writeL3DataToTBE, "ml", desc="write L3 data to TBE") {
+ assert(tbe.Dirty == false);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ tbe.DataBlk := entry.DataBlk;
+ tbe.LastSender := entry.LastSender;
+ tbe.L3Hit := true;
+ tbe.MemData := true;
+ }
+
+ action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (in_msg.Dirty) {
+ DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender);
+ DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk);
+ if (tbe.wtData) {
+ DataBlock tmp := in_msg.DataBlk;
+ tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+ tbe.DataBlk := tmp;
+ } else if (tbe.Dirty) {
+ if(tbe.atomicData == false && tbe.wtData == false) {
+ DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
+ assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data
+ }
+ } else {
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := in_msg.Dirty;
+ tbe.LastSender := in_msg.Sender;
+ }
+ }
+ if (in_msg.Hit) {
+ tbe.Cached := true;
+ }
+ }
+ }
+
+ action(yc_writeCPUDataToTBE, "yc", desc="write CPU Data to TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (in_msg.Dirty) {
+ DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender);
+ DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk);
+ if (tbe.Dirty) {
+ DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
+ assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data
+ }
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := false;
+ tbe.LastSender := in_msg.Sender;
+ }
+ }
+ }
+
+ action(x_decrementAcks, "x", desc="decrement Acks pending") {
+ if (tbe.NumPendingAcks > 0) {
+ tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+ } else {
+ APPEND_TRANSITION_COMMENT(" Double ack! ");
+ }
+ assert(tbe.NumPendingAcks >= 0);
+ APPEND_TRANSITION_COMMENT(" Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ }
+
+ action(o_checkForCompletion, "o", desc="check for ack completion") {
+ if (tbe.NumPendingAcks == 0 && tbe.TriggeredAcksComplete == false) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ tbe.TriggeredAcksComplete := true;
+ }
+ APPEND_TRANSITION_COMMENT(" Check: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ }
+
+ action(ont_checkForCompletionNoTrigger, "ont", desc="check for ack completion, no trigger") {
+ if (tbe.NumPendingAcks == 0 && tbe.TriggeredAcksComplete == false) {
+ tbe.TriggeredAcksComplete := true;
+ }
+ APPEND_TRANSITION_COMMENT(" Check: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ }
+
+ action(rvp_removeVicDirtyIgnore, "rvp", desc="Remove ignored core") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor);
+ }
+ }
+
+ action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") {
+ peek(regDir_in, CPURequestMsg) {
+ getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor);
+ }
+ }
+
+ action(r_sendRequestToRegionDir, "r", desc="send request to Region Directory") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(requestNetworkReg_out, CPURequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.add(map_Address_to_RegionDir(address));
+ out_msg.Shared := in_msg.Shared;
+ out_msg.MessageSize := in_msg.MessageSize;
+ DPRINTF(RubySlicc, "out dest: %s\n", map_Address_to_RegionDir(address));
+ }
+ }
+ }
+
+ action(ai_ackInvalidate, "ai", desc="Ack to let the reg-dir know that the inv is ordered") {
+ peek(regBuf_in, CPURequestMsg) {
+ enqueue(regAckNetwork_out, UnblockMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ DPRINTF(RubySlicc, "ai out_msg: %s\n", out_msg);
+ }
+ }
+ }
+
+ action(aic_ackInvalidate, "aic", desc="Ack to let the reg-dir know that the inv is ordered") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (in_msg.NoAckNeeded == false) {
+ enqueue(regAckNetwork_out, UnblockMsg, 1) {
+ out_msg.addr := address;
+ if (machineIDToMachineType(in_msg.Sender) == MachineType:CorePair) {
+ out_msg.Destination.add(createMachineID(MachineType:RegionBuffer, intToID(0)));
+ } else {
+ out_msg.Destination.add(createMachineID(MachineType:RegionBuffer, intToID(1)));
+ }
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ DPRINTF(RubySlicc, "ai out_msg: %s\n", out_msg);
+ out_msg.wasValid := in_msg.isValid;
+ }
+ }
+ }
+ }
+
+ action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+ entry.DataBlk := in_msg.DataBlk;
+ entry.LastSender := in_msg.Sender;
+ } else {
+ if (L3CacheMemory.cacheAvail(address) == false) {
+ Addr victim := L3CacheMemory.cacheProbe(address);
+ CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+ L3CacheMemory.lookup(victim));
+ queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+ victim_entry.DataBlk);
+ L3CacheMemory.deallocate(victim);
+ }
+ assert(L3CacheMemory.cacheAvail(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+ entry.DataBlk := in_msg.DataBlk;
+ entry.LastSender := in_msg.Sender;
+ }
+ }
+ }
+
+ action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") {
+ if ((tbe.wtData || tbe.atomicData) && useL3OnWT) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+ entry.DataBlk := tbe.DataBlk;
+ entry.LastSender := tbe.LastSender;
+ } else {
+ if (L3CacheMemory.cacheAvail(address) == false) {
+ Addr victim := L3CacheMemory.cacheProbe(address);
+ CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+ L3CacheMemory.lookup(victim));
+ queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+ victim_entry.DataBlk);
+ L3CacheMemory.deallocate(victim);
+ }
+ assert(L3CacheMemory.cacheAvail(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+ entry.DataBlk := tbe.DataBlk;
+ entry.LastSender := tbe.LastSender;
+ }
+ }
+ }
+
+ action(ali_allocateL3Block, "ali", desc="allocate the L3 block on ForceInv") {
+ if (tbe.Dirty == true) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+ entry.DataBlk := tbe.DataBlk;
+ entry.LastSender := tbe.LastSender;
+ } else {
+ if (L3CacheMemory.cacheAvail(address) == false) {
+ Addr victim := L3CacheMemory.cacheProbe(address);
+ CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+ L3CacheMemory.lookup(victim));
+ queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+ victim_entry.DataBlk);
+ L3CacheMemory.deallocate(victim);
+ }
+ assert(L3CacheMemory.cacheAvail(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+ entry.DataBlk := tbe.DataBlk;
+ entry.LastSender := tbe.LastSender;
+ }
+ }
+ }
+
+ action(ali_allocateL3BlockNoTBE, "alt", desc="allocate the L3 block on ForceInv no TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (in_msg.Dirty) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ APPEND_TRANSITION_COMMENT(" ali wrote data to L3 (hit) ");
+ entry.DataBlk := in_msg.DataBlk;
+ entry.LastSender := in_msg.Sender;
+ } else {
+ if (L3CacheMemory.cacheAvail(address) == false) {
+ Addr victim := L3CacheMemory.cacheProbe(address);
+ CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+ L3CacheMemory.lookup(victim));
+ queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+ victim_entry.DataBlk);
+ L3CacheMemory.deallocate(victim);
+ }
+ assert(L3CacheMemory.cacheAvail(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+ APPEND_TRANSITION_COMMENT(" ali wrote data to L3 ");
+ entry.DataBlk := in_msg.DataBlk;
+ entry.LastSender := in_msg.Sender;
+ }
+ }
+ }
+ }
+
+ action(dl_deallocateL3, "dl", desc="deallocate the L3 block") {
+ L3CacheMemory.deallocate(address);
+ }
+
+ action(p_popRequestQueue, "p", desc="pop request queue") {
+ requestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(prd_popRegionQueue, "prd", desc="pop request queue") {
+ regDir_in.dequeue(clockEdge());
+ }
+
+ action(prb_popRegionBufQueue, "prb", desc="pop request queue") {
+ regBuf_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="pop response queue") {
+ responseNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pm_popMemQueue, "pm", desc="pop mem queue") {
+ memQueue_in.dequeue(clockEdge());
+ }
+
+ action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") {
+ L3TriggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(pu_popUnblockQueue, "pu", desc="pop unblock queue") {
+ unblockNetwork_in.dequeue(clockEdge());
+ }
+
+ action(yy_recycleResponseQueue, "yy", desc="recycle response queue") {
+ responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(ww_stallAndWaitRegRequestQueue, "ww", desc="recycle region dir request queue") {
+ stall_and_wait(regDir_in, address);
+ }
+
+ action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") {
+ stall_and_wait(requestNetwork_in, address);
+ }
+
+ action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") {
+ wakeUpBuffers(address);
+ }
+
+ action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") {
+ wakeUpAllBuffers();
+ }
+
+ action(z_stall, "z", desc="...") {
+ }
+
+ // TRANSITIONS
+
+ // transitions from U
+
+ transition({BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {Inv, Downgrade}) {
+ ww_stallAndWaitRegRequestQueue;
+ }
+
+ transition(U, Inv, BI){L3TagArrayRead} {
+ tr_allocateTBE;
+ dcr_probeInvCoreData; // only need to invalidate sharers
+ ai_ackInvalidate;
+ prb_popRegionBufQueue;
+ }
+
+ transition(U, Downgrade, BI){L3TagArrayRead} {
+ tr_allocateTBE;
+ ddr_probeDownCoreData; // only need to invalidate sharers
+ ai_ackInvalidate;
+ prb_popRegionBufQueue;
+ }
+
+ // The next 2 transistions are needed in the event that an invalidation
+ // is waiting for its ack from the core, but the event makes it through
+ // the region directory before the acks. This wouldn't be needed if
+ // we waited to ack the region dir until the directory got all the acks
+ transition({BR, BW, BI, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, WriteThrough, Atomic}) {
+ ww_stallAndWaitRegRequestQueue;
+ }
+
+ transition({BR, BW, BI, BL, BS_M, BM_M, B_M, BS_PM, BM_PM, B_PM, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {RdBlkSP, RdBlkMP, RdBlkP}) {
+ st_stallAndWaitRequest;
+ }
+
+ transition({BR, BW, BI, BL, BS_M, BM_M, B_M, BS_PM, BM_PM, B_PM, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {WriteThroughP,AtomicP}) {
+ st_stallAndWaitRequest;
+ }
+
+ transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead} {
+ t_allocateTBE;
+ l_queueMemRdReq;
+ sa_setAcks;
+ o_checkForCompletion;
+ ra_ackRegionDir;
+ prd_popRegionQueue;
+ }
+
+ transition(U, WriteThrough, BM_PM){L3TagArrayRead} {
+ t_allocateTBE;
+ w_sendResponseWBAck;
+ l_queueMemRdReq;
+ sa_setAcks;
+ o_checkForCompletion;
+ ra_ackRegionDir;
+ prd_popRegionQueue;
+ }
+
+ transition(U, {RdBlkM,Atomic}, BM_PM){L3TagArrayRead} {
+ t_allocateTBE;
+ l_queueMemRdReq;
+ sa_setAcks;
+ o_checkForCompletion;
+ ra_ackRegionDir;
+ prd_popRegionQueue;
+ }
+
+ transition(U, RdBlk, B_PM){L3TagArrayRead} {
+ t_allocateTBE;
+ l_queueMemRdReq;
+ sa_setAcks;
+ o_checkForCompletion;
+ ra_ackRegionDir;
+ prd_popRegionQueue;
+ }
+
+ transition(U, {RdBlkSP}, BS_M) {L3TagArrayRead} {
+ tp_allocateTBEP;
+ lrp_queueMemRdReqP;
+ p_popRequestQueue;
+ }
+
+ transition(U, WriteThroughP, BM_M) {L3TagArrayRead} {
+ tp_allocateTBEP;
+ wp_sendResponseWBAckP;
+ lrp_queueMemRdReqP;
+ p_popRequestQueue;
+ }
+
+ transition(U, {RdBlkMP,AtomicP}, BM_M) {L3TagArrayRead} {
+ tp_allocateTBEP;
+ lrp_queueMemRdReqP;
+ p_popRequestQueue;
+ }
+
+ transition(U, RdBlkP, B_M) {L3TagArrayRead} {
+ tp_allocateTBEP;
+ lrp_queueMemRdReqP;
+ p_popRequestQueue;
+ }
+
+ transition(U, VicDirtyP, BL) {L3TagArrayRead} {
+ tp_allocateTBEP;
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(U, VicCleanP, BL) {L3TagArrayRead} {
+ tp_allocateTBEP;
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(BM_Pm, RdBlkSP, BM_Pm_B) {L3DataArrayWrite} {
+ sb_sendResponseSBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BS_Pm, RdBlkSP, BS_Pm_B) {L3DataArrayWrite} {
+ sb_sendResponseSBypass;
+ p_popRequestQueue;
+ }
+
+ transition(B_Pm, RdBlkSP, B_Pm_B) {L3DataArrayWrite} {
+ sb_sendResponseSBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BP, RdBlkSP, BP_B) {L3DataArrayWrite} {
+ sb_sendResponseSBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BM_Pm, RdBlkMP, BM_Pm_B) {L3DataArrayWrite} {
+ mb_sendResponseMBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BS_Pm, RdBlkMP, BS_Pm_B) {L3DataArrayWrite} {
+ mb_sendResponseMBypass;
+ p_popRequestQueue;
+ }
+
+ transition(B_Pm, RdBlkMP, B_Pm_B) {L3DataArrayWrite} {
+ mb_sendResponseMBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BP, RdBlkMP, BP_B) {L3DataArrayWrite} {
+ mb_sendResponseMBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BM_Pm, {WriteThroughP,AtomicP}, BM_Pm_B) {L3DataArrayWrite} {
+ wdp_writeBackDataPrivate;
+ mbwt_sendResponseWriteThroughBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BS_Pm, {WriteThroughP,AtomicP}, BS_Pm_B) {L3DataArrayWrite} {
+ wdp_writeBackDataPrivate;
+ mbwt_sendResponseWriteThroughBypass;
+ p_popRequestQueue;
+ }
+
+ transition(B_Pm, {WriteThroughP,AtomicP}, B_Pm_B) {L3DataArrayWrite} {
+ wdp_writeBackDataPrivate;
+ mbwt_sendResponseWriteThroughBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BP, {WriteThroughP,AtomicP}, BP_B) {L3DataArrayWrite} {
+ wdp_writeBackDataPrivate;
+ mbwt_sendResponseWriteThroughBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BM_Pm, RdBlkP, BM_Pm_B) {L3DataArrayWrite} {
+ esb_sendResponseESBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BS_Pm, RdBlkP, BS_Pm_B) {L3DataArrayWrite} {
+ esb_sendResponseESBypass;
+ p_popRequestQueue;
+ }
+
+ transition(B_Pm, RdBlkP, B_Pm_B) {L3DataArrayWrite}{
+ esb_sendResponseESBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BP, RdBlkP, BP_B) {L3DataArrayWrite}{
+ esb_sendResponseESBypass;
+ p_popRequestQueue;
+ }
+
+ transition(BM_Pm_B, CoreUnblock, BM_Pm) {
+ wa_wakeUpDependents;
+ pu_popUnblockQueue;
+ }
+
+ transition(BS_Pm_B, CoreUnblock, BS_Pm) {
+ wa_wakeUpDependents;
+ pu_popUnblockQueue;
+ }
+
+ transition(B_Pm_B, CoreUnblock, B_Pm) {
+ wa_wakeUpDependents;
+ pu_popUnblockQueue;
+ }
+
+ transition(BP_B, CoreUnblock, BP) {
+ wa_wakeUpDependents;
+ pu_popUnblockQueue;
+ }
+
+ transition(BM_Pm_B, UnblockWriteThrough, BM_Pm) {
+ wa_wakeUpDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(BS_Pm_B, UnblockWriteThrough, BS_Pm) {
+ wa_wakeUpDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(B_Pm_B, UnblockWriteThrough, B_Pm) {
+ wa_wakeUpDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(BP_B, UnblockWriteThrough, BP) {
+ wa_wakeUpDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(BM_Pm, VicDirtyP, BM_Pm_BL) {
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(BS_Pm, VicDirtyP, BS_Pm_BL) {
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(B_Pm, VicDirtyP, B_Pm_BL) {
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(BP, VicDirtyP, BP_BL) {
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(BM_Pm, VicCleanP, BM_Pm_BL) {
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(BS_Pm, VicCleanP, BS_Pm_BL) {
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(B_Pm, VicCleanP, B_Pm_BL) {
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(BP, VicCleanP, BP_BL) {
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition(BM_Pm_BL, CPUData, BM_Pm) {
+ yc_writeCPUDataToTBE;
+ d_writeDataToMemory;
+ wa_wakeUpDependents;
+ pr_popResponseQueue;
+ }
+
+ transition(BS_Pm_BL, CPUData, BS_Pm) {
+ yc_writeCPUDataToTBE;
+ d_writeDataToMemory;
+ wa_wakeUpDependents;
+ pr_popResponseQueue;
+ }
+
+ transition(B_Pm_BL, CPUData, B_Pm) {
+ yc_writeCPUDataToTBE;
+ d_writeDataToMemory;
+ wa_wakeUpDependents;
+ pr_popResponseQueue;
+ }
+
+ transition(BP_BL, CPUData, BP) {
+ yc_writeCPUDataToTBE;
+ d_writeDataToMemory;
+ wa_wakeUpDependents;
+ pr_popResponseQueue;
+ }
+
+ transition({BR, BW, BL}, {VicDirtyP, VicCleanP}) {
+ st_stallAndWaitRequest;
+ }
+
+ transition({BR, BW, BL}, {VicDirty, VicClean}) {
+ ww_stallAndWaitRegRequestQueue;
+ }
+
+ transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} {
+ dt_deallocateTBE;
+ d_writeDataToMemory;
+ al_allocateL3Block;
+ wa_wakeUpDependents;
+ pr_popResponseQueue;
+ }
+
+ transition(BL, StaleWB, U) {L3TagArrayWrite} {
+ dt_deallocateTBE;
+ wa_wakeUpAllDependents;
+ pr_popResponseQueue;
+ }
+
+ transition({BI, B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {VicDirty, VicClean}) {
+ ww_stallAndWaitRegRequestQueue;
+ }
+
+ transition({BI, B, BS_M, BM_M, B_M, BS_PM, BM_PM, B_PM, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {VicDirtyP, VicCleanP}) {
+ st_stallAndWaitRequest;
+ }
+
+ transition({U, BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, WBAck) {
+ pm_popMemQueue;
+ }
+
+ transition({U, BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, StaleVicDirtyP) {
+ rvp_removeVicDirtyIgnore;
+ wp_sendResponseWBAckP;
+ p_popRequestQueue;
+ }
+
+ transition({U, BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, StaleVicDirty) {
+ rv_removeVicDirtyIgnore;
+ w_sendResponseWBAck;
+ prd_popRegionQueue;
+ }
+
+ transition(U, VicDirty, BL) {L3TagArrayRead} {
+ t_allocateTBE;
+ ra_ackRegionDir;
+ w_sendResponseWBAck;
+ prd_popRegionQueue;
+ }
+
+ transition(U, VicClean, BL) {L3TagArrayRead} {
+ t_allocateTBE;
+ ra_ackRegionDir;
+ w_sendResponseWBAck;
+ prd_popRegionQueue;
+ }
+
+ transition({B, BR}, CoreUnblock, U) {
+ wa_wakeUpDependents;
+ pu_popUnblockQueue;
+ }
+
+ transition({B, BR}, UnblockWriteThrough, U) {
+ wa_wakeUpDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(BS_M, MemData, B) {L3TagArrayWrite, L3DataArrayWrite} {
+ mt_writeMemDataToTBE;
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ mt_writeMemDataToTBE;
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ mt_writeMemDataToTBE;
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BS_PM, MemData, BS_Pm) {} {
+ mt_writeMemDataToTBE;
+ wa_wakeUpDependents;
+ pm_popMemQueue;
+ }
+
+ transition(BM_PM, MemData, BM_Pm){} {
+ mt_writeMemDataToTBE;
+ wa_wakeUpDependents;
+ pm_popMemQueue;
+ }
+
+ transition(B_PM, MemData, B_Pm){} {
+ mt_writeMemDataToTBE;
+ wa_wakeUpDependents;
+ pm_popMemQueue;
+ }
+
+ transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ ptl_popTriggerQueue;
+ }
+
+ transition(BM_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ ptl_popTriggerQueue;
+ }
+
+ transition(B_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ ptl_popTriggerQueue;
+ }
+
+ transition(BS_PM, L3Hit, BS_Pm) {
+ wa_wakeUpDependents;
+ ptl_popTriggerQueue;
+ }
+
+ transition(BM_PM, L3Hit, BM_Pm) {
+ wa_wakeUpDependents;
+ ptl_popTriggerQueue;
+ }
+
+ transition(B_PM, L3Hit, B_Pm) {
+ wa_wakeUpDependents;
+ ptl_popTriggerQueue;
+ }
+
+ transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BP, BI}, CPUPrbResp) {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ ont_checkForCompletionNoTrigger;
+ pr_popResponseQueue;
+ }
+
+ transition({B, B_M, BS_M, BM_M}, {CPUPrbResp, LastCPUPrbResp}) {
+ z_stall;
+ }
+
+ transition({BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {CPUPrbResp, LastCPUPrbResp}) {
+ // recycling because PrbResponse and data come on the same network
+ yy_recycleResponseQueue;
+ }
+
+ transition(U, {CPUPrbResp, LastCPUPrbResp}) {L3TagArrayRead, L3DataArrayWrite} {
+ aic_ackInvalidate;
+ wdt_writeBackDataInvNoTBE;
+ ali_allocateL3BlockNoTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(BL, {CPUPrbResp, LastCPUPrbResp}) {} {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ wdi_writeBackDataInv;
+ ali_allocateL3Block;
+ pr_popResponseQueue;
+ }
+
+ transition(BS_PM, LastCPUPrbResp, BS_M) {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ ont_checkForCompletionNoTrigger;
+ pr_popResponseQueue;
+ }
+
+ transition(BS_PM, ProbeAcksComplete, BS_M) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(BM_PM, LastCPUPrbResp, BM_M) {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ ont_checkForCompletionNoTrigger;
+ pr_popResponseQueue;
+ }
+
+ transition(BM_PM, ProbeAcksComplete, BM_M) {} {
+ pt_popTriggerQueue;
+ }
+
+ transition(B_PM, LastCPUPrbResp, B_M) {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ ont_checkForCompletionNoTrigger;
+ pr_popResponseQueue;
+ }
+
+ transition(B_PM, ProbeAcksComplete, B_M){} {
+ pt_popTriggerQueue;
+ }
+
+ transition(BS_Pm, LastCPUPrbResp, B) {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ ont_checkForCompletionNoTrigger;
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ ali_allocateL3Block;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ ali_allocateL3Block;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(BM_Pm, LastCPUPrbResp, B) {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ ont_checkForCompletionNoTrigger;
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ ali_allocateL3Block;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ ali_allocateL3Block;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(B_Pm, LastCPUPrbResp, B) {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ ont_checkForCompletionNoTrigger;
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ ali_allocateL3Block;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ ali_allocateL3Block;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(BP, LastCPUPrbResp, B) {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ ont_checkForCompletionNoTrigger;
+ c_sendResponseCtoD;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
+ c_sendResponseCtoD;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(BI, LastCPUPrbResp, B) {
+ aic_ackInvalidate;
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ ont_checkForCompletionNoTrigger;
+ wa_wakeUpDependents;
+ wdi_writeBackDataInv;
+ ali_allocateL3Block;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(BI, ProbeAcksComplete, U) {L3TagArrayWrite, L3DataArrayWrite}{
+ wa_wakeUpDependents;
+ wdi_writeBackDataInv;
+ ali_allocateL3Block;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-msg.sm b/src/mem/protocol/MOESI_AMD_Base-Region-msg.sm
new file mode 100644
index 000000000..823933e57
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-Region-msg.sm
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+enumeration(CoherenceRequestType, desc="Coherence Request Types") {
+ // CPU Request Types ONLY
+ RdBlk, desc="Read Blk";
+ RdBlkM, desc="Read Blk Modified";
+ RdBlkS, desc="Read Blk Shared";
+ VicClean, desc="L2 clean eviction";
+ VicDirty, desc="L2 dirty eviction";
+
+ WrCancel, desc="want to cancel WB to Memory"; // should this be here?
+
+ WBApproval, desc="WB Approval";
+
+ // Messages between Dir and R-Dir
+ ForceInv, desc="Send invalide to the block";
+ ForceDowngrade, desc="Send downgrade to the block";
+ Unblock, desc="Used to let the dir know a message has been sunk";
+
+ // Messages between R-Dir and R-Buffer
+ PrivateNotify, desc="Let region buffer know it has private access";
+ SharedNotify, desc="Let region buffer know it has shared access";
+ WbNotify, desc="Let region buffer know it saw its wb request";
+ Downgrade, desc="Force the region buffer to downgrade to shared";
+ // Response to R-Dir (probably should be on a different network, but
+ // I need it to be ordered with respect to requests)
+ InvAck, desc="Let the R-Dir know when the inv has occured";
+
+ PrivateRequest, desc="R-buf wants the region in private";
+ UpgradeRequest, desc="R-buf wants the region in private";
+ SharedRequest, desc="R-buf wants the region in shared (could respond with private)";
+ CleanWbRequest, desc="R-buf wants to deallocate clean region";
+
+ NA, desc="So we don't get segfaults";
+}
+
+enumeration(ProbeRequestType, desc="Probe Request Types") {
+ PrbDowngrade, desc="Probe for Status"; // EtoS, MtoO, StoS
+ PrbInv, desc="Probe to Invalidate";
+
+ // For regions
+ PrbRepl, desc="Force the cache to do a replacement";
+ PrbRegDowngrade, desc="Probe for Status"; // EtoS, MtoO, StoS
+}
+
+
+enumeration(CoherenceResponseType, desc="Coherence Response Types") {
+ NBSysResp, desc="Northbridge response to CPU Rd request";
+ NBSysWBAck, desc="Northbridge response ok to WB";
+ TDSysResp, desc="TCCdirectory response to CPU Rd request";
+ TDSysWBAck, desc="TCCdirectory response ok to WB";
+ TDSysWBNack, desc="TCCdirectory response ok to drop";
+ CPUPrbResp, desc="CPU Probe Response";
+ CPUData, desc="CPU Data";
+ StaleNotif, desc="Notification of Stale WBAck, No data to writeback";
+ CPUCancelWB, desc="want to cancel WB to Memory";
+ MemData, desc="Data from Memory";
+
+ // for regions
+ PrivateAck, desc="Ack that r-buf received private notify";
+ RegionWbAck, desc="Writeback Ack that r-buf completed deallocation";
+ DirReadyAck, desc="Directory (mem ctrl)<->region dir handshake";
+}
+
+enumeration(CoherenceState, default="CoherenceState_NA", desc="Coherence State") {
+ Modified, desc="Modified";
+ Owned, desc="Owned state";
+ Exclusive, desc="Exclusive";
+ Shared, desc="Shared";
+ NA, desc="NA";
+}
+
+structure(CPURequestMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ Addr DemandAddress, desc="Physical block address for this request";
+ CoherenceRequestType Type, desc="Type of request";
+ DataBlock DataBlk, desc="data for the cache line"; // only for WB
+ bool Dirty, desc="whether WB data is dirty"; // only for WB
+ MachineID Requestor, desc="Node who initiated the request";
+ NetDest Destination, desc="Multicast destination mask";
+ bool Shared, desc="For CPU_WrVicBlk, vic is O not M. For CPU_ClVicBlk, vic is S";
+ MessageSizeType MessageSize, desc="size category of the message";
+ Cycles InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
+ Cycles ForwardRequestTime, default="0", desc="time the dir forwarded the request";
+ Cycles ProbeRequestStartTime, default="0", desc="the time the dir started the probe request";
+ bool DemandRequest, default="false", desc="For profiling purposes";
+
+ NetDest Sharers, desc="Caches that may have a valid copy of the data";
+ bool ForceShared, desc="R-dir knows it is shared, pass on so it sends an S copy, not E";
+ bool Private, default="false", desc="Requestor already has private permissions, no need for dir check";
+ bool CtoDSinked, default="false", desc="This is true if the CtoD previously sent must have been sunk";
+
+ bool NoAckNeeded, default="false", desc="True if region buffer doesn't need to ack";
+ int Acks, default="0", desc="Acks that the dir (mem ctrl) should expect to receive";
+ CoherenceRequestType OriginalType, default="CoherenceRequestType_NA", desc="Type of request from core fwded through region buffer";
+
+ bool functionalRead(Packet *pkt) {
+ // Only PUTX messages contains the data block
+ if (Type == CoherenceRequestType:VicDirty) {
+ return testAndRead(addr, DataBlk, pkt);
+ }
+
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return testAndWrite(addr, DataBlk, pkt);
+ }
+}
+
+structure(NBProbeRequestMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ ProbeRequestType Type, desc="probe signal";
+ bool ReturnData, desc="Indicates CPU should return data";
+ NetDest Destination, desc="Node to whom the data is sent";
+ MessageSizeType MessageSize, desc="size category of the message";
+ bool DemandRequest, default="false", desc="demand request, requesting 3-hop transfer";
+ Addr DemandAddress, desc="Demand block address for a region request";
+ MachineID Requestor, desc="Requestor id for 3-hop requests";
+ bool NoAckNeeded, default="false", desc="For short circuting acks";
+
+ bool functionalRead(Packet *pkt) {
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return false;
+ }
+
+}
+
+structure(TDProbeRequestMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ ProbeRequestType Type, desc="TD_PrbNxtState signal";
+ bool ReturnData, desc="Indicates CPU should return data";
+ bool localCtoD, desc="Indicates CtoD is within the GPU hierarchy (aka TCC subtree)";
+ NetDest Destination, desc="Node to whom the data is sent";
+ MessageSizeType MessageSize, desc="size category of the message";
+ MachineID Sender, desc="Node who sent the data";
+ bool currentOwner, default="false", desc="Is the sender the current owner";
+ bool DoneAck, default="false", desc="Is this a done ack?";
+ bool Dirty, default="false", desc="Was block dirty when evicted";
+ bool wasValid, default="false", desc="Was block valid when evicted";
+ bool valid, default="false", desc="Is block valid";
+ bool validToInvalid, default="false", desc="Was block valid when evicted";
+
+ bool functionalRead(Packet *pkt) {
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return false;
+ }
+}
+
+// Response Messages seemed to be easily munged into one type
+structure(ResponseMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ CoherenceResponseType Type, desc="NB Sys Resp or CPU Response to Probe";
+ MachineID Sender, desc="Node who sent the data";
+ NetDest Destination, desc="Node to whom the data is sent";
+ // Begin Used Only By CPU Response
+ DataBlock DataBlk, desc="data for the cache line";
+ bool Hit, desc="probe hit valid line";
+ bool Shared, desc="True if S, or if NB Probe ReturnData==1 && O";
+ bool Dirty, desc="Is the data dirty (different than memory)?";
+ bool Ntsl, desc="indicates probed lin will be invalid after probe";
+ bool UntransferredOwner, desc="pending confirmation of ownership change";
+ // End Used Only By CPU Response
+
+ // Begin NB Response Only
+ CoherenceState State, default=CoherenceState_NA, desc="What returned data from NB should be in";
+ bool CtoD, desc="was the originator a CtoD?";
+ // End NB Response Only
+
+ bool NbReqShared, desc="modification of Shared field from initial request, e.g. hit by shared probe";
+
+ MessageSizeType MessageSize, desc="size category of the message";
+ Cycles InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
+ Cycles ForwardRequestTime, default="0", desc="time the dir forwarded the request";
+ Cycles ProbeRequestStartTime, default="0", desc="the time the dir started the probe request";
+ bool DemandRequest, default="false", desc="For profiling purposes";
+
+ bool L3Hit, default="false", desc="Did memory or L3 supply the data?";
+ MachineID OriginalResponder, desc="Mach which wrote the data to the L3";
+
+ bool NotCached, default="false", desc="True when the Region buffer has already evicted the line";
+
+ bool NoAckNeeded, default="false", desc="For short circuting acks";
+ bool isValid, default="false", desc="Is acked block valid";
+
+ bool functionalRead(Packet *pkt) {
+ // Only PUTX messages contains the data block
+ if (Type == CoherenceResponseType:CPUData ||
+ Type == CoherenceResponseType:MemData) {
+ return testAndRead(addr, DataBlk, pkt);
+ }
+
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return testAndWrite(addr, DataBlk, pkt);
+ }
+}
+
+structure(UnblockMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ NetDest Destination, desc="Destination (always directory)";
+ MessageSizeType MessageSize, desc="size category of the message";
+}
+
+enumeration(TriggerType, desc="Trigger Type") {
+ L2_to_L1, desc="L2 to L1 fill";
+ AcksComplete, desc="NB received all needed Acks";
+
+ // For regions
+ InvNext, desc="Invalidate the next block";
+ PrivateAck, desc="Loopback ack for machines with no Region Buffer";
+ AllOutstanding, desc="All outstanding requests have finished";
+ L3Hit, desc="L3 hit in dir";
+
+ // For region directory once the directory is blocked
+ InvRegion, desc="Invalidate region";
+ DowngradeRegion, desc="downgrade region";
+}
+
+enumeration(CacheId, desc="Which Cache in the Core") {
+ L1I, desc="L1 I-cache";
+ L1D0, desc="L1 D-cache cluster 0";
+ L1D1, desc="L1 D-cache cluster 1";
+ NA, desc="Default";
+}
+
+structure(TriggerMsg, desc="...", interface="Message") {
+ Addr addr, desc="Address";
+ TriggerType Type, desc="Type of trigger";
+ CacheId Dest, default="CacheId_NA", desc="Cache to invalidate";
+
+ bool functionalRead(Packet *pkt) {
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return false;
+ }
+
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm
new file mode 100644
index 000000000..89f7d6fcb
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm
@@ -0,0 +1,1368 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Jason Power
+ */
+
+machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol")
+: CacheMemory *cacheMemory; // stores only region addresses. Must set block size same as below
+ bool isOnCPU;
+ int blocksPerRegion := 64; // 4k regions
+ Cycles toDirLatency := 5; // Latency to fwd requests to directory
+ Cycles toRegionDirLatency := 5; // Latency for requests and acks to directory
+ Cycles nextEvictLatency := 1; // latency added between each block while evicting region
+ bool noTCCdir := "False";
+ int TCC_select_num_bits := 1;
+
+ // From the Cores
+ MessageBuffer * requestFromCore, network="From", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseFromCore, network="From", virtual_network="2", vnet_type="response";
+
+ // Requests to the cores or directory
+ MessageBuffer * requestToNetwork, network="To", virtual_network="0", vnet_type="request";
+
+ // From Region-Dir
+ MessageBuffer * notifyFromRegionDir, network="From", virtual_network="7", vnet_type="request";
+ MessageBuffer * probeFromRegionDir, network="From", virtual_network="8", vnet_type="request";
+
+ // From the directory
+ MessageBuffer * unblockFromDir, network="From", virtual_network="4", vnet_type="unblock";
+
+ // To the region-Dir
+ MessageBuffer * responseToRegDir, network="To", virtual_network="2", vnet_type="response";
+
+ MessageBuffer * triggerQueue;
+{
+
+ // States
+ state_declaration(State, desc="Region states", default="RegionBuffer_State_NP") {
+ NP, AccessPermission:Invalid, desc="Not present in region directory";
+ P, AccessPermission:Invalid, desc="Region is private to the cache";
+ S, AccessPermission:Invalid, desc="Region is possibly shared with others";
+
+ NP_PS, AccessPermission:Invalid, desc="Intermediate state waiting for notify from r-dir";
+ S_P, AccessPermission:Invalid, desc="Intermediate state while upgrading region";
+
+ P_NP, AccessPermission:Invalid, desc="Intermediate state while evicting all lines in region";
+ P_S, AccessPermission:Invalid, desc="Intermediate state while downgrading all lines in region";
+
+ S_NP_PS, AccessPermission:Invalid, desc="Got an inv in S_P, waiting for all inv acks, then going to since the write is already out there NP_PS";
+ P_NP_NP, AccessPermission:Invalid, desc="Evicting region on repl, then got an inv. Need to re-evict";
+
+ P_NP_O, AccessPermission:Invalid, desc="Waiting for all outstanding requests";
+ P_S_O, AccessPermission:Invalid, desc="Waiting for all outstanding requests";
+ S_O, AccessPermission:Invalid, desc="Waiting for all outstanding requests";
+ S_NP_PS_O, AccessPermission:Invalid, desc="Waiting for all outstanding requests";
+
+ SS_P, AccessPermission:Invalid, desc="Waiting for CPU write that we know is there";
+
+ P_NP_W, AccessPermission:Invalid, desc="Waiting for writeback ack";
+
+ NP_W, AccessPermission:Invalid, desc="Got a done ack before request, waiting for that victim";
+ }
+
+ enumeration(Event, desc="Region directory events") {
+ CPURead, desc="Access from CPU core";
+ CPUWrite, desc="Access from CPU core";
+ CPUWriteback, desc="Writeback request from CPU core";
+
+ ReplRegion, desc="Start a replace on a region";
+
+ PrivateNotify, desc="Update entry to private state";
+ SharedNotify, desc="Update entry to shared state";
+ WbNotify, desc="Writeback notification received";
+ InvRegion, desc="Start invalidating a region";
+ DowngradeRegion,desc="Start invalidating a region";
+
+ InvAck, desc="Ack from core";
+
+ DoneAck, desc="Ack from core that request has finished";
+ AllOutstanding, desc="All outstanding requests have now finished";
+
+ Evict, desc="Loopback to evict each block";
+ LastAck_PrbResp, desc="Done eviciting all the blocks, got the last ack from core, now respond to region dir";
+ LastAck_CleanWb, desc="Done eviciting all the blocks, got the last ack from core, now start clean writeback (note the dir has already been updated)";
+
+ StallAccess, desc="Wait for the done ack on the address before proceeding";
+ StallDoneAck, desc="Wait for the access on the address before proceeding";
+
+ StaleRequest, desc="Got a stale victim from the cache, fwd it without incrementing outstanding";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+ structure(BoolVec, external="yes") {
+ bool at(int);
+ void resize(int);
+ void clear();
+ int size();
+ }
+
+ structure(Entry, desc="Region entry", interface="AbstractCacheEntry") {
+ Addr addr, desc="Base address of this region";
+ State RegionState, desc="Region state";
+ DataBlock DataBlk, desc="Data for the block (always empty in region buffer)";
+ BoolVec ValidBlocks, desc="A vector to keep track of valid blocks";
+ int NumValidBlocks, desc="Number of trues in ValidBlocks to avoid iterating";
+ BoolVec UsedBlocks, desc="A vector to keep track of blocks ever valid";
+ bool dirty, desc="Dirty as best known by the region buffer";
+ // This is needed so we don't ack an invalidate until all requests are ordered
+ int NumOutstandingReqs, desc="Total outstanding private/shared requests";
+ BoolVec OutstandingReqs, desc="Blocks that have outstanding private/shared requests";
+ bool MustDowngrade, desc="Set when we got a downgrade before the shd or pvt permissions";
+ Cycles ProbeRequestTime, default="Cycles(0)", desc="Time region dir started the probe";
+ Cycles InitialRequestTime, default="Cycles(0)", desc="Time message was sent to region dir";
+ bool MsgSentToDir, desc="True if the current request required a message to the dir";
+ bool clearOnDone, default="false", desc="clear valid bit when request completes";
+ Addr clearOnDoneAddr, desc="clear valid bit when request completes";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ //int NumValidBlocks, desc="Number of blocks valid so we don't have to count a BoolVec";
+ BoolVec ValidBlocks, desc="A vector to keep track of valid blocks";
+ bool AllAcksReceived, desc="Got all necessary acks from dir";
+ bool DoneEvicting, desc="Done iterating through blocks checking for valids";
+ BoolVec AcksReceived, desc="Received acks for theses blocks\n";
+ bool SendAck, desc="If true, send an ack to the r-dir at end of inv";
+ ProbeRequestType MsgType, desc="Type of message to send while 'evicting' ";
+ int NumOutstandingReqs, desc="Total outstanding private/shared requests";
+ BoolVec OutstandingReqs, desc="Blocks that have outstanding private/shared requests";
+ MachineID Requestor, desc="Requestor for three hop transactions";
+ bool DemandRequest, default="false", desc="Associated with a demand request";
+ Addr DemandAddress, desc="Address for the demand request";
+ bool DoneAckReceived, default="false", desc="True if the done ack arrived before the message";
+ Addr DoneAckAddr, desc="Address of the done ack received early";
+ int OutstandingThreshold, desc="Number of outstanding requests to trigger AllOutstanding on";
+
+ ProbeRequestType NewMsgType, desc="Type of message to send while 'evicting' ";
+ MachineID NewRequestor, desc="Requestor for three hop transactions";
+ bool NewDemandRequest, default="false", desc="Associated with a demand request";
+ Addr NewDemandAddress, desc="Address for the demand request";
+ bool dirty, desc="dirty";
+ bool AllOutstandingTriggered, default="false", desc="bit for only one all outstanding";
+ int OutstandingAcks, default="0", desc="number of acks to wait for";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ // Stores only region addresses
+ TBETable TBEs, template="<RegionBuffer_TBE>", constructor="m_number_of_TBEs";
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ int blockBits, default="RubySystem::getBlockSizeBits()";
+ int blockBytes, default="RubySystem::getBlockSizeBytes()";
+ int regionBits, default="log2(m_blocksPerRegion)";
+
+ // Functions
+
+ int getRegionOffset(Addr addr) {
+ if (blocksPerRegion > 1) {
+ Addr offset := bitSelect(addr, blockBits, regionBits+blockBits-1);
+ int ret := addressToInt(offset);
+ assert(ret < blocksPerRegion);
+ return ret;
+ } else {
+ return 0;
+ }
+ }
+
+ Addr getRegionBase(Addr addr) {
+ return maskLowOrderBits(addr, blockBits+regionBits);
+ }
+
+ Addr getNextBlock(Addr addr) {
+ Addr a := addr;
+ return makeNextStrideAddress(a, 1);
+ }
+
+ MachineID getPeer(MachineID mach, Addr address) {
+ if (isOnCPU) {
+ return createMachineID(MachineType:CorePair, intToID(0));
+ } else if (noTCCdir) {
+ return mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ } else {
+ return createMachineID(MachineType:TCCdir, intToID(0));
+ }
+ }
+
+ bool isOutstanding(TBE tbe, Entry cache_entry, Addr addr) {
+ if (is_valid(tbe) && tbe.OutstandingReqs.size() > 0) {
+ DPRINTF(RubySlicc, " outstanding tbe reqs %s %s %d %d\n",
+ tbe.OutstandingReqs, addr, getRegionOffset(addr),
+ tbe.OutstandingReqs.at(getRegionOffset(addr)));
+ return tbe.OutstandingReqs.at(getRegionOffset(addr));
+ } else if (is_valid(cache_entry)) {
+ DPRINTF(RubySlicc, " outstanding cache reqs %s %s %d %d\n",
+ cache_entry.OutstandingReqs, addr, getRegionOffset(addr),
+ cache_entry.OutstandingReqs.at(getRegionOffset(addr)));
+ return cache_entry.OutstandingReqs.at(getRegionOffset(addr));
+ } else {
+ return false;
+ }
+ }
+
+ bool isOnGPU() {
+ if (isOnCPU) {
+ return false;
+ }
+ return true;
+ }
+
+ bool isRead(CoherenceRequestType type) {
+ return (type == CoherenceRequestType:RdBlk || type == CoherenceRequestType:RdBlkS ||
+ type == CoherenceRequestType:VicClean);
+ }
+
+ bool presentOrAvail(Addr addr) {
+ return cacheMemory.isTagPresent(getRegionBase(addr)) || cacheMemory.cacheAvail(getRegionBase(addr));
+ }
+
+ // Returns a region entry!
+ Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+ return static_cast(Entry, "pointer", cacheMemory.lookup(getRegionBase(addr)));
+ }
+
+ TBE getTBE(Addr addr), return_by_pointer="yes" {
+ return TBEs.lookup(getRegionBase(addr));
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ return getCacheEntry(getRegionBase(addr)).DataBlk;
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if (is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.RegionState;
+ }
+ return State:NP;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+ if (is_valid(cache_entry)) {
+ cache_entry.RegionState := state;
+ }
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := getTBE(addr);
+ if(is_valid(tbe)) {
+ return RegionBuffer_State_to_permission(tbe.TBEState);
+ }
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return RegionBuffer_State_to_permission(cache_entry.RegionState);
+ }
+ return AccessPermission:NotPresent;
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ functionalMemoryRead(pkt);
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ if (functionalMemoryWrite(pkt)) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(RegionBuffer_State_to_permission(state));
+ }
+ }
+
+ void recordRequestType(RequestType stat, Addr addr) {
+ if (stat == RequestType:TagArrayRead) {
+ cacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (stat == RequestType:TagArrayWrite) {
+ cacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:TagArrayRead) {
+ return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+
+ // Overloaded outgoing request nework for both probes to cores and reqeusts
+ // to the directory.
+ // Fix Me: These forwarded requests need to be on a separate virtual channel
+ // to avoid deadlock!
+ out_port(requestNetwork_out, CPURequestMsg, requestToNetwork);
+ out_port(probeNetwork_out, NBProbeRequestMsg, requestToNetwork);
+
+ out_port(responseNetwork_out, ResponseMsg, responseToRegDir);
+
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=4) {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := getTBE(in_msg.addr);
+ DPRINTF(RubySlicc, "trigger msg: %s (%s)\n", in_msg, getRegionBase(in_msg.addr));
+ assert(is_valid(tbe));
+ if (in_msg.Type == TriggerType:AcksComplete) {
+ if (tbe.SendAck) {
+ trigger(Event:LastAck_PrbResp, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:LastAck_CleanWb, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == TriggerType:AllOutstanding) {
+ trigger(Event:AllOutstanding, in_msg.addr, cache_entry, tbe);
+ } else {
+ assert(in_msg.Type == TriggerType:InvNext);
+ trigger(Event:Evict, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+ in_port(unblockNetwork_in, UnblockMsg, unblockFromDir, rank=3) {
+ if (unblockNetwork_in.isReady(clockEdge())) {
+ peek(unblockNetwork_in, UnblockMsg) {
+ TBE tbe := getTBE(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.DoneAck) {
+ if (isOutstanding(tbe, cache_entry, in_msg.addr)) {
+ trigger(Event:DoneAck, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:StallDoneAck, in_msg.addr, cache_entry, tbe);
+ }
+ } else {
+ assert(is_valid(tbe));
+ trigger(Event:InvAck, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ }
+ }
+
+ in_port(probeNetwork_in, NBProbeRequestMsg, probeFromRegionDir, rank=2) {
+ if (probeNetwork_in.isReady(clockEdge())) {
+ peek(probeNetwork_in, NBProbeRequestMsg) {
+ TBE tbe := getTBE(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ assert(getRegionBase(in_msg.addr) == in_msg.addr);
+ if (in_msg.Type == ProbeRequestType:PrbInv) {
+ trigger(Event:InvRegion, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+ trigger(Event:DowngradeRegion, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unknown probe message\n");
+ }
+ }
+ }
+ }
+
+ in_port(notifyNetwork_in, CPURequestMsg, notifyFromRegionDir, rank=1) {
+ if (notifyNetwork_in.isReady(clockEdge())) {
+ peek(notifyNetwork_in, CPURequestMsg) {
+ TBE tbe := getTBE(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ //Fix Me...add back in: assert(is_valid(cache_entry));
+ if (in_msg.Type == CoherenceRequestType:WbNotify) {
+ trigger(Event:WbNotify, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:SharedNotify) {
+ trigger(Event:SharedNotify, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:PrivateNotify) {
+ trigger(Event:PrivateNotify, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unknown notify message\n");
+ }
+ }
+ }
+ }
+
+ // In from cores
+ // NOTE: We get the cache / TBE entry based on the region address,
+ // but pass the block address to the actions
+ in_port(requestNetwork_in, CPURequestMsg, requestFromCore, rank=0) {
+ if (requestNetwork_in.isReady(clockEdge())) {
+ peek(requestNetwork_in, CPURequestMsg) {
+ TBE tbe := getTBE(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (is_valid(tbe) && tbe.DoneAckReceived && tbe.DoneAckAddr == in_msg.addr) {
+ DPRINTF(RubySlicc, "Stale/Stall request %s\n", in_msg.Type);
+ if (in_msg.Type == CoherenceRequestType:VicDirty || in_msg.Type == CoherenceRequestType:VicClean )
+ {
+ trigger(Event:StaleRequest, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:StallAccess, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (isOutstanding(tbe, cache_entry, in_msg.addr)) {
+ DPRINTF(RubySlicc, "Stall outstanding request %s\n", in_msg.Type);
+ trigger(Event:StallAccess, in_msg.addr, cache_entry, tbe);
+ } else {
+ if (presentOrAvail(in_msg.addr)) {
+ if (in_msg.Type == CoherenceRequestType:RdBlkM ) {
+ trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:WriteThrough ) {
+ trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:Atomic ) {
+ trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+ } else {
+ if (in_msg.Type == CoherenceRequestType:VicDirty ||
+ in_msg.Type == CoherenceRequestType:VicClean) {
+ trigger(Event:CPUWriteback, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:CPURead, in_msg.addr, cache_entry, tbe);
+ }
+ }
+ } else {
+ Addr victim := cacheMemory.cacheProbe(getRegionBase(in_msg.addr));
+ TBE victim_tbe := getTBE(victim);
+ Entry victim_entry := getCacheEntry(victim);
+ DPRINTF(RubySlicc, "Replacing region %s for %s(%s)\n", victim, in_msg.addr, getRegionBase(in_msg.addr));
+ trigger(Event:ReplRegion, victim, victim_entry, victim_tbe);
+ }
+ }
+ }
+ }
+ }
+
+ // Actions
+ action(f_fwdReqToDir, "f", desc="Forward CPU request to directory") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+ out_msg.addr := in_msg.addr;
+ out_msg.Type := in_msg.Type;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Dirty := in_msg.Dirty;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+ out_msg.Shared := in_msg.Shared;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.Private := true;
+ out_msg.InitialRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := curCycle();
+ if (getState(tbe, cache_entry, address) == State:S) {
+ out_msg.ForceShared := true;
+ }
+ DPRINTF(RubySlicc, "Fwd: %s\n", out_msg);
+ //assert(getState(tbe, cache_entry, address) == State:P || getState(tbe, cache_entry, address) == State:S);
+ if (getState(tbe, cache_entry, address) == State:NP_W) {
+ APPEND_TRANSITION_COMMENT(" fwding stale request: ");
+ APPEND_TRANSITION_COMMENT(out_msg.Type);
+ }
+ }
+ }
+ }
+
+ action(u_updateRegionEntry, "u", desc="Update the entry for profiling") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ if (is_valid(cache_entry)) {
+ if (in_msg.CtoDSinked == false) {
+ APPEND_TRANSITION_COMMENT(" incr outstanding ");
+ cache_entry.NumOutstandingReqs := 1 + cache_entry.NumOutstandingReqs;
+ assert(cache_entry.OutstandingReqs.at(getRegionOffset(address)) == false);
+ cache_entry.OutstandingReqs.at(getRegionOffset(address)) := true;
+ assert(cache_entry.NumOutstandingReqs == countBoolVec(cache_entry.OutstandingReqs));
+ } else {
+ APPEND_TRANSITION_COMMENT(" NOT incr outstanding ");
+ assert(in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:RdBlkS);
+ }
+ APPEND_TRANSITION_COMMENT(cache_entry.NumOutstandingReqs);
+ if (in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:Atomic ||
+ in_msg.Type == CoherenceRequestType:WriteThrough )
+ {
+ cache_entry.dirty := true;
+ }
+ if (in_msg.Type == CoherenceRequestType:VicDirty ||
+ in_msg.Type == CoherenceRequestType:VicClean) {
+ DPRINTF(RubySlicc, "Got %s for addr %s\n", in_msg.Type, address);
+ //assert(cache_entry.ValidBlocks.at(getRegionOffset(address)));
+ // can in fact be inv if core got an inv after a vicclean before it got here
+ if (cache_entry.ValidBlocks.at(getRegionOffset(address))) {
+ cache_entry.clearOnDone := true;
+ cache_entry.clearOnDoneAddr := address;
+ //cache_entry.ValidBlocks.at(getRegionOffset(address)) := false;
+ //cache_entry.NumValidBlocks := cache_entry.NumValidBlocks - 1;
+ }
+ } else {
+ if (cache_entry.ValidBlocks.at(getRegionOffset(address)) == false) {
+ cache_entry.NumValidBlocks := cache_entry.NumValidBlocks + 1;
+ }
+ DPRINTF(RubySlicc, "before valid addr %s bits %s\n",
+ in_msg.Type, address, cache_entry.ValidBlocks);
+ cache_entry.ValidBlocks.at(getRegionOffset(address)) := true;
+ DPRINTF(RubySlicc, "after valid addr %s bits %s\n",
+ in_msg.Type, address, cache_entry.ValidBlocks);
+ cache_entry.UsedBlocks.at(getRegionOffset(address)) := true;
+ }
+ assert(cache_entry.NumValidBlocks <= blocksPerRegion);
+ assert(cache_entry.NumValidBlocks >= 0);
+ APPEND_TRANSITION_COMMENT(" valid blocks ");
+ APPEND_TRANSITION_COMMENT(cache_entry.ValidBlocks);
+ } else {
+ error("This shouldn't happen anymore I think");
+ //tbe.ValidBlocks.at(getRegionOffest(address)) := true;
+ assert(getState(tbe, cache_entry, address) == State:P_NP);
+ }
+ }
+ }
+
+ action(uw_updatePossibleWriteback, "uw", desc="writeback request complete") {
+ peek(unblockNetwork_in, UnblockMsg) {
+ if (is_valid(cache_entry) && in_msg.validToInvalid &&
+ cache_entry.clearOnDone && cache_entry.clearOnDoneAddr == address) {
+ DPRINTF(RubySlicc, "I have no idea what is going on here\n");
+ cache_entry.ValidBlocks.at(getRegionOffset(address)) := false;
+ cache_entry.NumValidBlocks := cache_entry.NumValidBlocks - 1;
+ cache_entry.clearOnDone := false;
+ }
+ }
+ }
+
+
+ action(rp_requestPrivate, "rp", desc="Send private request r-dir") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ // No need to send acks on replacements
+ assert(is_invalid(tbe));
+ enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) {
+ out_msg.addr := address; // use the actual address so the demand request can be fulfilled
+ out_msg.DemandAddress := address;
+ out_msg.Type := CoherenceRequestType:PrivateRequest;
+ out_msg.OriginalType := in_msg.Type;
+ out_msg.Requestor := machineID;
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.InitialRequestTime := curCycle();
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(map_Address_to_RegionDir(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ DPRINTF(RubySlicc, "Private request %s\n", out_msg);
+ }
+ cache_entry.ProbeRequestTime := curCycle();
+ cache_entry.MsgSentToDir := true;
+ APPEND_TRANSITION_COMMENT(getRegionBase(address));
+ }
+ }
+
+ action(ru_requestUpgrade, "ru", desc="Send upgrade request r-dir") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ // No need to send acks on replacements
+ assert(is_invalid(tbe));
+ enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) {
+ out_msg.addr := address; // use the actual address so the demand request can be fulfilled
+ out_msg.Type := CoherenceRequestType:UpgradeRequest;
+ out_msg.OriginalType := in_msg.Type;
+ out_msg.Requestor := machineID;
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.InitialRequestTime := curCycle();
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(map_Address_to_RegionDir(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ cache_entry.ProbeRequestTime := curCycle();
+ cache_entry.MsgSentToDir := true;
+ APPEND_TRANSITION_COMMENT(getRegionBase(address));
+ }
+ }
+
+ action(rw_requestWriteback, "rq", desc="Send writeback request") {
+ // No need to send acks on replacements
+ enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) {
+ out_msg.addr := getRegionBase(address); // use the actual address so the demand request can be fulfilled
+ out_msg.Type := CoherenceRequestType:CleanWbRequest;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.Dirty := tbe.dirty;
+ APPEND_TRANSITION_COMMENT(getRegionBase(address));
+ }
+ }
+
+ action(rs_requestShared, "rs", desc="Send shared request r-dir") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ // No need to send acks on replacements
+ assert(is_invalid(tbe));
+ enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) {
+ out_msg.addr := address; // use the actual address so the demand request can be fulfilled
+ out_msg.Type := CoherenceRequestType:SharedRequest;
+ out_msg.OriginalType := in_msg.Type;
+ out_msg.Requestor := machineID;
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.InitialRequestTime := curCycle();
+ // will this always be ok? probably not for multisocket
+ out_msg.Destination.add(map_Address_to_RegionDir(address));
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ }
+ cache_entry.ProbeRequestTime := curCycle();
+ cache_entry.MsgSentToDir := true;
+ APPEND_TRANSITION_COMMENT(getRegionBase(address));
+ }
+ }
+
+ action(ai_ackRegionInv, "ai", desc="Send ack to r-dir on region inv if tbe says so") {
+ // No need to send acks on replacements
+ assert(is_valid(tbe));
+ enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(ad_ackDircetory, "ad", desc="send probe response to directory") {
+ if (noTCCdir && tbe.MsgType == ProbeRequestType:PrbDowngrade && isOnGPU()) { //VIPER tcc doesnt understand PrbShrData
+ assert(tbe.DemandRequest); //So, let RegionBuffer take care of sending back ack
+ enqueue(responseNetwork_out, ResponseMsg, toDirLatency) {
+ out_msg.addr := tbe.DemandAddress;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := getPeer(machineID,address);
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.Dirty := false; // only true if sending back data i think
+ out_msg.Hit := false;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.NoAckNeeded := true;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+
+ action(aie_ackRegionExclusiveInv, "aie", desc="Send ack to r-dir on region inv if tbe says so") {
+ // No need to send acks on replacements
+ assert(is_valid(tbe));
+ enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.NotCached := true;
+ out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.Dirty := tbe.dirty;
+ }
+ }
+
+ action(ain_ackRegionInvNow, "ain", desc="Send ack to r-dir on region inv") {
+ enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(aine_ackRegionInvExlusiveNow, "aine", desc="Send ack to r-dir on region inv with exlusive permission") {
+ enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceResponseType:CPUPrbResp;
+ out_msg.Sender := machineID;
+ out_msg.NotCached := true;
+ out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(ap_ackPrivateNotify, "ap", desc="Send ack to r-dir on private notify") {
+ enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceResponseType:PrivateAck;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+
+ action(aw_ackWbNotify, "aw", desc="Send ack to r-dir on writeback notify") {
+ peek(notifyNetwork_in, CPURequestMsg) {
+ if (in_msg.NoAckNeeded == false) {
+ enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceResponseType:RegionWbAck;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ }
+ }
+ }
+ }
+
+ action(e_evictCurrent, "e", desc="Evict this block in the region") {
+ // send force invalidate message to directory to invalidate this block
+ // must invalidate all blocks since region buffer could have privitized it
+ if (tbe.ValidBlocks.at(getRegionOffset(address)) &&
+ (tbe.DemandRequest == false || tbe.DemandAddress != address)) {
+ DPRINTF(RubySlicc, "trying to evict address %s (base: %s, offset: %d)\n", address, getRegionBase(address), getRegionOffset(address));
+ DPRINTF(RubySlicc, "tbe valid blocks %s\n", tbe.ValidBlocks);
+
+ enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := tbe.MsgType;
+ out_msg.ReturnData := true;
+ if (address == tbe.DemandAddress) {
+ out_msg.DemandRequest := true;
+ }
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.add(getPeer(machineID,address));
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ APPEND_TRANSITION_COMMENT(" current ");
+ APPEND_TRANSITION_COMMENT(tbe.ValidBlocks.at(getRegionOffset(address)));
+ tbe.AllAcksReceived := false;
+ } else {
+ DPRINTF(RubySlicc, "Not evicting demand %s\n", address);
+ }
+ }
+
+ action(ed_evictDemand, "ed", desc="Evict the demand request if it's valid") {
+ if (noTCCdir && tbe.MsgType == ProbeRequestType:PrbDowngrade && isOnGPU()) {
+ tbe.OutstandingAcks := 0;
+ tbe.AllAcksReceived := true;
+ tbe.DoneEvicting := true;
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.Type := TriggerType:AcksComplete;
+ out_msg.addr := getRegionBase(address);
+ }
+ } else if (tbe.DemandRequest) {
+ enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+ out_msg.addr := tbe.DemandAddress;
+ out_msg.Type := tbe.MsgType;
+ out_msg.ReturnData := true;
+ out_msg.DemandRequest := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.add(getPeer(machineID,address));
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ tbe.AllAcksReceived := false;
+ }
+ if (tbe.ValidBlocks.at(getRegionOffset(tbe.DemandAddress)) == false) {
+ tbe.OutstandingAcks := tbe.OutstandingAcks + 1;
+ }
+ APPEND_TRANSITION_COMMENT("Evicting demand ");
+ APPEND_TRANSITION_COMMENT(tbe.DemandAddress);
+ }
+ APPEND_TRANSITION_COMMENT("waiting acks ");
+ APPEND_TRANSITION_COMMENT(tbe.OutstandingAcks);
+ }
+
+ action(adp_AckDemandProbe, "fp", desc="forward demand probe even if we know that the core is invalid") {
+ peek(probeNetwork_in, NBProbeRequestMsg) {
+ if (in_msg.DemandRequest) {
+ enqueue(responseNetwork_out, ResponseMsg, toDirLatency) {
+ out_msg.addr := in_msg.DemandAddress;
+ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
+ out_msg.Sender := getPeer(machineID,address);
+ out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+ out_msg.Dirty := false; // only true if sending back data i think
+ out_msg.Hit := false;
+ out_msg.Ntsl := false;
+ out_msg.State := CoherenceState:NA;
+ out_msg.NoAckNeeded := true;
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+ }
+ }
+
+ action(en_enqueueNextEvict, "en", desc="Queue evict the next block in the region") {
+ // increment in_msg.addr by blockSize bytes and enqueue on triggerPort
+ // Only enqueue if the next address doesn't overrun the region bound
+ if (getRegionBase(getNextBlock(address)) == getRegionBase(address)) {
+ enqueue(triggerQueue_out, TriggerMsg, nextEvictLatency) {
+ out_msg.Type := TriggerType:InvNext;
+ out_msg.addr := getNextBlock(address);
+ }
+ } else {
+ tbe.DoneEvicting := true;
+ DPRINTF(RubySlicc, "Done evicing region %s\n", getRegionBase(address));
+ DPRINTF(RubySlicc, "Waiting for %s acks\n", tbe.OutstandingAcks);
+ if (tbe.AllAcksReceived == true) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.Type := TriggerType:AcksComplete;
+ out_msg.addr := getRegionBase(address);
+ }
+ }
+ }
+ }
+
+ action(ef_enqueueFirstEvict, "ef", desc="Queue the first block in the region to be evicted") {
+ if (tbe.DoneEvicting == false) {
+ enqueue(triggerQueue_out, TriggerMsg, nextEvictLatency) {
+ out_msg.Type := TriggerType:InvNext;
+ out_msg.addr := getRegionBase(address);
+ }
+ }
+ }
+
+ action(ra_receiveAck, "ra", desc="Mark TBE entry as received this ack") {
+ DPRINTF(RubySlicc, "received ack for %s reg: %s vec: %s pos: %d\n",
+ address, getRegionBase(address), tbe.ValidBlocks, getRegionOffset(address));
+ peek(unblockNetwork_in, UnblockMsg) {
+ //
+ // Note the tbe ValidBlock vec will be a conservative list of the
+ // valid blocks since the cache entry ValidBlock vec is set on the
+ // request
+ //
+ if (in_msg.wasValid) {
+ assert(tbe.ValidBlocks.at(getRegionOffset(address)));
+ }
+ }
+ tbe.OutstandingAcks := tbe.OutstandingAcks - 1;
+ tbe.AcksReceived.at(getRegionOffset(address)) := true;
+ assert(tbe.OutstandingAcks >= 0);
+ if (tbe.OutstandingAcks == 0) {
+ tbe.AllAcksReceived := true;
+ if (tbe.DoneEvicting) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.Type := TriggerType:AcksComplete;
+ out_msg.addr := getRegionBase(address);
+ }
+ }
+ }
+
+ APPEND_TRANSITION_COMMENT(getRegionBase(address));
+ APPEND_TRANSITION_COMMENT(" Acks left receive ");
+ APPEND_TRANSITION_COMMENT(tbe.OutstandingAcks);
+ }
+
+ action(do_decrementOutstanding, "do", desc="Decrement outstanding requests") {
+ APPEND_TRANSITION_COMMENT(" decr outstanding ");
+ if (is_valid(cache_entry)) {
+ cache_entry.NumOutstandingReqs := cache_entry.NumOutstandingReqs - 1;
+ assert(cache_entry.OutstandingReqs.at(getRegionOffset(address)));
+ cache_entry.OutstandingReqs.at(getRegionOffset(address)) := false;
+ assert(cache_entry.NumOutstandingReqs >= 0);
+ assert(cache_entry.NumOutstandingReqs == countBoolVec(cache_entry.OutstandingReqs));
+ APPEND_TRANSITION_COMMENT(cache_entry.NumOutstandingReqs);
+ }
+ if (is_valid(tbe)) {
+ tbe.NumOutstandingReqs := tbe.NumOutstandingReqs - 1;
+ assert(tbe.OutstandingReqs.at(getRegionOffset(address)));
+ tbe.OutstandingReqs.at(getRegionOffset(address)) := false;
+ assert(tbe.NumOutstandingReqs >= 0);
+ assert(tbe.NumOutstandingReqs == countBoolVec(tbe.OutstandingReqs));
+ APPEND_TRANSITION_COMMENT(tbe.NumOutstandingReqs);
+ }
+ }
+
+ action(co_checkOutstanding, "co", desc="check if there are no more outstanding requests") {
+ assert(is_valid(tbe));
+ if ((tbe.NumOutstandingReqs <= tbe.OutstandingThreshold) &&
+ (tbe.AllOutstandingTriggered == false)) {
+ APPEND_TRANSITION_COMMENT(" no more outstanding: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumOutstandingReqs);
+ APPEND_TRANSITION_COMMENT(tbe.OutstandingThreshold);
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.Type := TriggerType:AllOutstanding;
+ if (tbe.DemandRequest) {
+ out_msg.addr := tbe.DemandAddress;
+ } else {
+ out_msg.addr := getRegionBase(address);
+ }
+ DPRINTF(RubySlicc, "co enqueuing %s\n", out_msg);
+ tbe.AllOutstandingTriggered := true;
+ }
+ } else {
+ APPEND_TRANSITION_COMMENT(" still more outstanding ");
+ }
+ }
+
+ action(ro_resetAllOutstanding, "ro", desc="Reset all outstanding") {
+ tbe.AllOutstandingTriggered := false;
+ }
+
+ action(so_setOutstandingCheckOne, "so", desc="Check outstanding is waiting for 1, not 0") {
+ // Need this for S_P because one request is outstanding between here and r-dir
+ tbe.OutstandingThreshold := 1;
+ }
+
+ action(a_allocateRegionEntry, "a", desc="Allocate a new entry") {
+ set_cache_entry(cacheMemory.allocate(getRegionBase(address), new Entry));
+ cache_entry.ValidBlocks.clear();
+ cache_entry.ValidBlocks.resize(blocksPerRegion);
+ cache_entry.UsedBlocks.clear();
+ cache_entry.UsedBlocks.resize(blocksPerRegion);
+ cache_entry.dirty := false;
+ cache_entry.NumOutstandingReqs := 0;
+ cache_entry.OutstandingReqs.clear();
+ cache_entry.OutstandingReqs.resize(blocksPerRegion);
+ }
+
+ action(d_deallocateRegionEntry, "d", desc="Deallocate region entry") {
+ cacheMemory.deallocate(getRegionBase(address));
+ unset_cache_entry();
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ TBEs.allocate(getRegionBase(address));
+ set_tbe(getTBE(address));
+ tbe.OutstandingAcks := 0;
+ tbe.AllAcksReceived := true; // starts true since the region could be empty
+ tbe.DoneEvicting := false;
+ tbe.AcksReceived.clear();
+ tbe.AcksReceived.resize(blocksPerRegion);
+ tbe.SendAck := false;
+ tbe.OutstandingThreshold := 0;
+ if (is_valid(cache_entry)) {
+ tbe.NumOutstandingReqs := cache_entry.NumOutstandingReqs;
+ tbe.OutstandingReqs := cache_entry.OutstandingReqs;
+ assert(tbe.NumOutstandingReqs == countBoolVec(tbe.OutstandingReqs));
+ tbe.dirty := cache_entry.dirty;
+ tbe.ValidBlocks := cache_entry.ValidBlocks;
+ tbe.OutstandingAcks := countBoolVec(tbe.ValidBlocks);
+ APPEND_TRANSITION_COMMENT(" tbe valid blocks ");
+ APPEND_TRANSITION_COMMENT(tbe.ValidBlocks);
+ APPEND_TRANSITION_COMMENT(" cache valid blocks ");
+ APPEND_TRANSITION_COMMENT(cache_entry.ValidBlocks);
+ } else {
+ tbe.dirty := false;
+ }
+ }
+
+ action(m_markSendAck, "m", desc="Mark TBE that we need to ack at end") {
+ assert(is_valid(tbe));
+ tbe.SendAck := true;
+ }
+
+ action(db_markDirtyBit, "db", desc="Mark TBE dirty bit") {
+ peek(unblockNetwork_in, UnblockMsg) {
+ if (is_valid(tbe)) {
+ tbe.dirty := tbe.dirty || in_msg.Dirty;
+ }
+ }
+ }
+
+ action(dr_markDoneAckReceived, "dr", desc="Mark TBE that a done ack has been received") {
+ assert(is_valid(tbe));
+ tbe.DoneAckReceived := true;
+ tbe.DoneAckAddr := address;
+ APPEND_TRANSITION_COMMENT(" marking done ack on TBE ");
+ }
+
+ action(se_setTBE, "se", desc="Set msg type to evict") {
+ peek(probeNetwork_in, NBProbeRequestMsg) {
+ tbe.MsgType := in_msg.Type;
+ tbe.Requestor := in_msg.Requestor;
+ tbe.DemandAddress := in_msg.DemandAddress;
+ tbe.DemandRequest := in_msg.DemandRequest;
+ }
+ }
+
+ action(sne_setNewTBE, "sne", desc="Set msg type to evict") {
+ peek(probeNetwork_in, NBProbeRequestMsg) {
+ tbe.NewMsgType := in_msg.Type;
+ tbe.NewRequestor := in_msg.Requestor;
+ tbe.NewDemandAddress := in_msg.DemandAddress;
+ tbe.NewDemandRequest := in_msg.DemandRequest;
+ }
+ }
+
+ action(soe_setOldTBE, "soe", desc="Set msg type to evict") {
+ tbe.MsgType := tbe.NewMsgType;
+ tbe.Requestor := tbe.NewRequestor;
+ tbe.DemandAddress := tbe.NewDemandAddress;
+ tbe.DemandRequest := tbe.NewDemandRequest;
+ tbe.OutstandingAcks := countBoolVec(tbe.ValidBlocks);
+ tbe.AllAcksReceived := true; // starts true since the region could be empty
+ tbe.DoneEvicting := false;
+ tbe.AcksReceived.clear();
+ tbe.AcksReceived.resize(blocksPerRegion);
+ tbe.SendAck := false;
+ }
+
+ action(ser_setTBE, "ser", desc="Set msg type to evict repl") {
+ tbe.MsgType := ProbeRequestType:PrbInv;
+ }
+
+ action(md_setMustDowngrade, "md", desc="When permissions finally get here, must be shared") {
+ assert(is_valid(cache_entry));
+ cache_entry.MustDowngrade := true;
+ }
+
+ action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+ TBEs.deallocate(getRegionBase(address));
+ unset_tbe();
+ }
+
+ action(p_popRequestQueue, "p", desc="Pop the request queue") {
+ requestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pl_popUnblockQueue, "pl", desc="Pop the unblock queue") {
+ unblockNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pn_popNotifyQueue, "pn", desc="Pop the notify queue") {
+ notifyNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pp_popProbeQueue, "pp", desc="Pop the probe queue") {
+ probeNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pt_popTriggerQueue, "pt", desc="Pop the trigger queue") {
+ DPRINTF(RubySlicc, "Trigger Before Contents: %s\n", triggerQueue_in);
+ triggerQueue_in.dequeue(clockEdge());
+ DPRINTF(RubySlicc, "Trigger After Contents: %s\n", triggerQueue_in);
+ }
+
+ // Must always use wake all, since non-region address wait on region addresses
+ action(wa_wakeUpAllDependents, "wa", desc="Wake up any requests waiting for this region") {
+ wakeUpAllBuffers();
+ }
+
+ action(zz_stallAndWaitRequestQueue, "\z", desc="recycle request queue") {
+ Addr regAddr := getRegionBase(address);
+ DPRINTF(RubySlicc, "Stalling address %s\n", regAddr);
+ stall_and_wait(requestNetwork_in, regAddr);
+ }
+
+ action(yy_stallAndWaitProbeQueue, "\y", desc="stall probe queue") {
+ Addr regAddr := getRegionBase(address);
+ stall_and_wait(probeNetwork_in, regAddr);
+ }
+
+ action(yyy_recycleProbeQueue, "\yy", desc="recycle probe queue") {
+ probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(zzz_recycleRequestQueue, "\zz", desc="recycle request queue") {
+ requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(www_recycleUnblockNetwork, "\ww", desc="recycle unblock queue") {
+ unblockNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(z_stall, "z", desc="stall request queue") {
+ // fake state
+ }
+
+ action(mru_setMRU, "mru", desc="set MRU") {
+ cacheMemory.setMRU(address, cache_entry.NumValidBlocks);
+ }
+
+ // Transitions
+
+ transition({NP_PS, S_P, S_NP_PS, P_NP, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, P_NP_W, P_NP_NP, NP_W}, {CPURead, CPUWriteback, CPUWrite}) {} {
+ zz_stallAndWaitRequestQueue;
+ }
+
+ transition(SS_P, {CPURead, CPUWriteback}) {
+ zz_stallAndWaitRequestQueue;
+ }
+
+ transition({NP, S, P, NP_PS, S_P, S_NP_PS, P_NP, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, SS_P, NP_W, P_NP_NP}, StallAccess) {} {
+ zz_stallAndWaitRequestQueue;
+ }
+
+ transition({S, P, NP_PS, S_P, S_NP_PS, P_NP, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, SS_P, P_NP_W, P_NP_NP, NP_W}, StallDoneAck) {
+ www_recycleUnblockNetwork;
+ }
+
+ transition(NP, StallDoneAck, NP_W) {
+ t_allocateTBE;
+ db_markDirtyBit;
+ dr_markDoneAckReceived;
+ pl_popUnblockQueue;
+ }
+
+ transition(NP_W, StaleRequest, NP) {
+ f_fwdReqToDir;
+ dt_deallocateTBE;
+ wa_wakeUpAllDependents;
+ p_popRequestQueue;
+ }
+
+ transition(P_NP_O, DowngradeRegion) {} {
+ z_stall; // should stall and wait
+ }
+
+ transition({NP_PS, S_NP_PS, S_P, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, SS_P}, ReplRegion) {} {
+ zz_stallAndWaitRequestQueue; // can't let things get out of order!
+ }
+
+ transition({P_NP_O, S_O, SS_P}, InvRegion) {} {
+ yyy_recycleProbeQueue; // can't be z_stall because there could be a RdBlkM in the requestQueue which has the sinked flag which is blocking the inv
+ }
+
+ transition(P_NP, {InvRegion, DowngradeRegion}, P_NP_NP) {} {
+ sne_setNewTBE;
+ pp_popProbeQueue;
+ }
+
+ transition(S_P, DowngradeRegion) {} {
+ adp_AckDemandProbe;
+ ain_ackRegionInvNow;
+ pp_popProbeQueue;
+ }
+
+ transition(P_NP_W, InvRegion) {
+ adp_AckDemandProbe;
+ ain_ackRegionInvNow;
+ pp_popProbeQueue;
+ }
+
+ transition(P_NP_W, DowngradeRegion) {
+ adp_AckDemandProbe;
+ aine_ackRegionInvExlusiveNow;
+ pp_popProbeQueue;
+ }
+
+ transition({P, S}, {CPURead, CPUWriteback}) {TagArrayRead, TagArrayWrite} {
+ mru_setMRU;
+ f_fwdReqToDir;
+ u_updateRegionEntry;
+ p_popRequestQueue;
+ }
+
+ transition(P, CPUWrite) {TagArrayRead, TagArrayWrite} {
+ mru_setMRU;
+ f_fwdReqToDir;
+ u_updateRegionEntry;
+ p_popRequestQueue;
+ }
+
+ transition(S, CPUWrite, S_O) {TagArrayRead} {
+ mru_setMRU;
+ t_allocateTBE;
+ co_checkOutstanding;
+ zz_stallAndWaitRequestQueue;
+ }
+
+ transition(S_O, AllOutstanding, SS_P) {
+ wa_wakeUpAllDependents;
+ ro_resetAllOutstanding;
+ pt_popTriggerQueue;
+ }
+
+ transition(SS_P, CPUWrite, S_P) {
+ mru_setMRU;
+ dt_deallocateTBE;
+ ru_requestUpgrade;
+ u_updateRegionEntry;
+ p_popRequestQueue;
+ }
+
+ transition(NP, {CPURead, CPUWriteback}, NP_PS) {TagArrayRead, TagArrayWrite} {
+ a_allocateRegionEntry;
+ rs_requestShared;
+ u_updateRegionEntry;
+ p_popRequestQueue;//zz_stallAndWaitRequestQueue;
+ }
+
+ transition(NP, CPUWrite, NP_PS) {TagArrayRead, TagArrayWrite} {
+ a_allocateRegionEntry;
+ rp_requestPrivate;
+ u_updateRegionEntry;
+ p_popRequestQueue;//zz_stallAndWaitRequestQueue;
+ }
+
+ transition(NP_PS, PrivateNotify, P) {} {
+ ap_ackPrivateNotify;
+ wa_wakeUpAllDependents;
+ pn_popNotifyQueue;
+ }
+
+ transition(S_P, PrivateNotify, P) {} {
+ ap_ackPrivateNotify;
+ wa_wakeUpAllDependents;
+ pn_popNotifyQueue;
+ }
+
+ transition(NP_PS, SharedNotify, S) {} {
+ ap_ackPrivateNotify;
+ wa_wakeUpAllDependents;
+ pn_popNotifyQueue;
+ }
+
+ transition(P_NP_W, WbNotify, NP) {} {
+ aw_ackWbNotify;
+ wa_wakeUpAllDependents;
+ dt_deallocateTBE;
+ pn_popNotifyQueue;
+ }
+
+ transition({P, S}, ReplRegion, P_NP_O) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ ser_setTBE;
+ d_deallocateRegionEntry;
+ co_checkOutstanding;
+ }
+
+ transition({P, S}, InvRegion, P_NP_O) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ se_setTBE;
+ m_markSendAck;
+ d_deallocateRegionEntry;
+ co_checkOutstanding;
+ pp_popProbeQueue;
+ }
+
+ transition(P_NP_O, AllOutstanding, P_NP) {} {
+ ed_evictDemand;
+ ef_enqueueFirstEvict;
+ ro_resetAllOutstanding;
+ pt_popTriggerQueue;
+ }
+
+ transition(S_P, InvRegion, S_NP_PS_O) {TagArrayRead} {
+ t_allocateTBE;
+ se_setTBE;
+ m_markSendAck;
+ so_setOutstandingCheckOne;
+ co_checkOutstanding;
+ pp_popProbeQueue;
+ }
+
+ transition(S_NP_PS_O, AllOutstanding, S_NP_PS) {
+ ed_evictDemand;
+ ef_enqueueFirstEvict;
+ ro_resetAllOutstanding;
+ pt_popTriggerQueue;
+ }
+
+ transition(P, DowngradeRegion, P_S_O) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ se_setTBE;
+ m_markSendAck;
+ co_checkOutstanding;
+ pp_popProbeQueue;
+ }
+
+ transition(P_S_O, AllOutstanding, P_S) {} {
+ ed_evictDemand;
+ ef_enqueueFirstEvict;
+ ro_resetAllOutstanding;
+ pt_popTriggerQueue;
+ }
+
+ transition({P, S}, DoneAck) {TagArrayWrite} {
+ do_decrementOutstanding;
+ wa_wakeUpAllDependents;
+ db_markDirtyBit;
+ uw_updatePossibleWriteback;
+ pl_popUnblockQueue;
+ }
+
+ transition({S_P, NP_PS, S_NP_PS}, DoneAck) {TagArrayWrite} {
+ www_recycleUnblockNetwork;
+ }
+
+ transition({P_NP_O, S_NP_PS_O, P_S_O, S_O}, DoneAck) {} {
+ do_decrementOutstanding;
+ co_checkOutstanding;
+ db_markDirtyBit;
+ uw_updatePossibleWriteback;
+ pl_popUnblockQueue;
+ }
+
+ transition({P_NP, P_S, S_NP_PS, P_NP_NP}, Evict) {} {
+ e_evictCurrent;
+ en_enqueueNextEvict;
+ pt_popTriggerQueue;
+ }
+
+ transition({P_NP, P_S, S_NP_PS, P_NP_NP}, InvAck) {} {
+ ra_receiveAck;
+ db_markDirtyBit;
+ pl_popUnblockQueue;
+ }
+
+ transition(P_NP, LastAck_CleanWb, P_NP_W) {} {
+ rw_requestWriteback;
+ pt_popTriggerQueue;
+ }
+
+ transition(P_NP_NP, LastAck_CleanWb, P_NP) {} {
+ soe_setOldTBE;
+ m_markSendAck;
+ ed_evictDemand;
+ ef_enqueueFirstEvict;
+ pt_popTriggerQueue;
+ }
+
+ transition(P_NP, LastAck_PrbResp, NP) {} {
+ aie_ackRegionExclusiveInv;
+ dt_deallocateTBE;
+ wa_wakeUpAllDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(S_NP_PS, LastAck_PrbResp, NP_PS) {} {
+ aie_ackRegionExclusiveInv;
+ dt_deallocateTBE;
+ wa_wakeUpAllDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(P_S, LastAck_PrbResp, S) {} {
+ ai_ackRegionInv;
+ ad_ackDircetory;
+ dt_deallocateTBE;
+ wa_wakeUpAllDependents;
+ pt_popTriggerQueue;
+ }
+
+}
+
diff --git a/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm b/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm
new file mode 100644
index 000000000..b392311c5
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm
@@ -0,0 +1,1187 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Jason Power
+ */
+
+machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol")
+: CacheMemory *cacheMemory; // stores only region addresses. Must set block size same as below
+ NodeID cpuRegionBufferNum;
+ NodeID gpuRegionBufferNum;
+ int blocksPerRegion := 64; // 4k regions
+ Cycles toDirLatency := 10; // Latency to fwd requests and send invs to directory
+ bool always_migrate := "False";
+ bool sym_migrate := "False";
+ bool asym_migrate := "False";
+ bool noTCCdir := "False";
+ int TCC_select_num_bits := 1;
+
+ // To the directory
+ MessageBuffer * requestToDir, network="To", virtual_network="5", vnet_type="request";
+
+ // To the region buffers
+ MessageBuffer * notifyToRBuffer, network="To", virtual_network="7", vnet_type="request";
+ MessageBuffer * probeToRBuffer, network="To", virtual_network="8", vnet_type="request";
+
+ // From the region buffers
+ MessageBuffer * responseFromRBuffer, network="From", virtual_network="2", vnet_type="response";
+ MessageBuffer * requestFromRegBuf, network="From", virtual_network="0", vnet_type="request";
+
+ MessageBuffer * triggerQueue;
+{
+
+ // States
+ state_declaration(State, desc="Region states", default="RegionDir_State_NP") {
+ NP, AccessPermission:Invalid, desc="Not present in region directory";
+ P, AccessPermission:Invalid, desc="Region is private to owner";
+ S, AccessPermission:Invalid, desc="Region is shared between CPU and GPU";
+
+ P_NP, AccessPermission:Invalid, desc="Evicting the region";
+ NP_P, AccessPermission:Invalid, desc="Must wait for ack from R-buf";
+ NP_S, AccessPermission:Invalid, desc="Must wait for ack from R-buf";
+ P_P, AccessPermission:Invalid, desc="Waiting for ack from R-buf";
+ S_S, AccessPermission:Invalid, desc="Waiting for ack from R-buf";
+ P_S, AccessPermission:Invalid, desc="Downgrading the region";
+ S_P, AccessPermission:Invalid, desc="Upgrading the region";
+ P_AS, AccessPermission:Invalid, desc="Sent invalidates, waiting for acks";
+ S_AP, AccessPermission:Invalid, desc="Sent invalidates, waiting for acks";
+ P_AP, AccessPermission:Invalid, desc="Sent invalidates, waiting for acks";
+
+ SP_NP_W, AccessPermission:Invalid, desc="Last sharer writing back, waiting for ack";
+ S_W, AccessPermission:Invalid, desc="Sharer writing back, waiting for ack";
+
+ P_AP_W, AccessPermission:Invalid, desc="Fwded request to dir, waiting for ack";
+ P_AS_W, AccessPermission:Invalid, desc="Fwded request to dir, waiting for ack";
+ S_AP_W, AccessPermission:Invalid, desc="Fwded request to dir, waiting for ack";
+ }
+
+ enumeration(Event, desc="Region directory events") {
+ SendInv, desc="Send inv message to any machine that has a region buffer";
+ SendUpgrade, desc="Send upgrade message to any machine that has a region buffer";
+ SendDowngrade, desc="Send downgrade message to any machine that has a region buffer";
+
+ Evict, desc="Evict this region";
+
+ UpgradeRequest, desc="Request from r-buf for an upgrade";
+ SharedRequest, desc="Request from r-buf for read";
+ PrivateRequest, desc="Request from r-buf for write";
+
+ InvAckCore, desc="Ack from region buffer to order the invalidate";
+ InvAckCoreNoShare, desc="Ack from region buffer to order the invalidate, and it does not have the region";
+ CPUPrivateAck, desc="Ack from region buffer to order private notification";
+
+ LastAck, desc="Done eviciting all the blocks";
+
+ StaleCleanWbRequest, desc="stale clean writeback reqeust";
+ StaleCleanWbRequestNoShare, desc="stale clean wb req from a cache which should be removed from sharers";
+ CleanWbRequest, desc="clean writeback reqeust, multiple sharers";
+ CleanWbRequest_LastSharer, desc="clean writeback reqeust, last sharer";
+ WritebackAck, desc="Writeback Ack from region buffer";
+ DirReadyAck, desc="Directory is ready, waiting Ack from region buffer";
+
+ TriggerInv, desc="trigger invalidate message";
+ TriggerDowngrade, desc="trigger downgrade message";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ DataArrayRead, desc="Read the data array";
+ DataArrayWrite, desc="Write the data array";
+ TagArrayRead, desc="Read the data array";
+ TagArrayWrite, desc="Write the data array";
+ }
+
+ structure(BoolVec, external="yes") {
+ bool at(int);
+ void resize(int);
+ void clear();
+ }
+
+ structure(Entry, desc="Region entry", interface="AbstractCacheEntry") {
+ Addr addr, desc="Base address of this region";
+ NetDest Sharers, desc="Set of machines that are sharing, but not owners";
+ State RegionState, desc="Region state";
+ DataBlock DataBlk, desc="Data for the block (always empty in region dir)";
+ MachineID Owner, desc="Machine which owns all blocks in this region";
+ Cycles ProbeStart, desc="Time when the first probe request was issued";
+ bool LastWriten, default="false", desc="The last time someone accessed this region, it wrote it";
+ bool LastWritenByCpu, default="false", desc="The last time the CPU accessed this region, it wrote it";
+ bool LastWritenByGpu, default="false", desc="The last time the GPU accessed this region, it wrote it";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ MachineID Owner, desc="Machine which owns all blocks in this region";
+ NetDest Sharers, desc="Set of machines to send evicts";
+ int NumValidBlocks, desc="Number of blocks valid so we don't have to count a BoolVec";
+ bool AllAcksReceived, desc="Got all necessary acks from dir";
+ CoherenceRequestType MsgType, desc="Msg type for the evicts could be inv or dwngrd";
+ Cycles ProbeRequestTime, default="Cycles(0)", desc="Start of probe request";
+ Cycles InitialRequestTime, default="Cycles(0)", desc="To forward back on out msg";
+ Addr DemandAddress, desc="Demand address from original request";
+ uint64_t probe_id, desc="probe id for lifetime profiling";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ // Stores only region addresses
+ TBETable TBEs, template="<RegionDir_TBE>", constructor="m_number_of_TBEs";
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ void set_cache_entry(AbstractCacheEntry b);
+ void unset_cache_entry();
+ void set_tbe(TBE b);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ int blockBits, default="RubySystem::getBlockSizeBits()";
+ int blockBytes, default="RubySystem::getBlockSizeBytes()";
+ int regionBits, default="log2(m_blocksPerRegion)";
+
+ // Functions
+
+ MachineID getCoreMachine(MachineID rBuf, Addr address) {
+ if (machineIDToNodeID(rBuf) == cpuRegionBufferNum) {
+ return createMachineID(MachineType:CorePair, intToID(0));
+ } else if (machineIDToNodeID(rBuf) == gpuRegionBufferNum) {
+ if (noTCCdir) {
+ return mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits);
+ } else {
+ return createMachineID(MachineType:TCCdir, intToID(0));
+ }
+ } else {
+ error("Unexpected region buffer number");
+ }
+ }
+
+ bool isCpuMachine(MachineID rBuf) {
+ if (machineIDToNodeID(rBuf) == cpuRegionBufferNum) {
+ return true;
+ } else if (machineIDToNodeID(rBuf) == gpuRegionBufferNum) {
+ return false;
+ } else {
+ error("Unexpected region buffer number");
+ }
+ }
+
+ bool symMigrate(Entry cache_entry) {
+ return cache_entry.LastWriten;
+ }
+
+ bool asymMigrate(Entry cache_entry, MachineID requestor) {
+ if (isCpuMachine(requestor)) {
+ return cache_entry.LastWritenByCpu;
+ } else {
+ return cache_entry.LastWritenByGpu;
+ }
+ }
+
+ int getRegionOffset(Addr addr) {
+ if (blocksPerRegion > 1) {
+ Addr offset := bitSelect(addr, blockBits, regionBits+blockBits-1);
+ int ret := addressToInt(offset);
+ assert(ret < blocksPerRegion);
+ return ret;
+ } else {
+ return 0;
+ }
+ }
+
+ Addr getRegionBase(Addr addr) {
+ return maskLowOrderBits(addr, blockBits+regionBits);
+ }
+
+ Addr getNextBlock(Addr addr) {
+ Addr a := addr;
+ makeNextStrideAddress(a, 1);
+ return a;
+ }
+
+ bool presentOrAvail(Addr addr) {
+ DPRINTF(RubySlicc, "Present? %s, avail? %s\n", cacheMemory.isTagPresent(getRegionBase(addr)), cacheMemory.cacheAvail(getRegionBase(addr)));
+ return cacheMemory.isTagPresent(getRegionBase(addr)) || cacheMemory.cacheAvail(getRegionBase(addr));
+ }
+
+ // Returns a region entry!
+ Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+ return static_cast(Entry, "pointer", cacheMemory.lookup(getRegionBase(addr)));
+ }
+
+ TBE getTBE(Addr addr), return_by_pointer="yes" {
+ return TBEs.lookup(getRegionBase(addr));
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ return getCacheEntry(getRegionBase(addr)).DataBlk;
+ }
+
+ State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ if (is_valid(tbe)) {
+ return tbe.TBEState;
+ } else if (is_valid(cache_entry)) {
+ return cache_entry.RegionState;
+ }
+ return State:NP;
+ }
+
+ void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ if (is_valid(tbe)) {
+ tbe.TBEState := state;
+ }
+ if (is_valid(cache_entry)) {
+ cache_entry.RegionState := state;
+ }
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ TBE tbe := getTBE(addr);
+ if(is_valid(tbe)) {
+ return RegionDir_State_to_permission(tbe.TBEState);
+ }
+ Entry cache_entry := getCacheEntry(addr);
+ if(is_valid(cache_entry)) {
+ return RegionDir_State_to_permission(cache_entry.RegionState);
+ }
+ return AccessPermission:NotPresent;
+ }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+ if (is_valid(cache_entry)) {
+ cache_entry.changePermission(RegionDir_State_to_permission(state));
+ }
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ functionalMemoryRead(pkt);
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ if (functionalMemoryWrite(pkt)) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ cacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ cacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ cacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ cacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:DataArrayRead) {
+ return cacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:DataArrayWrite) {
+ return cacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:TagArrayRead) {
+ return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:TagArrayWrite) {
+ return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+
+ out_port(requestNetwork_out, CPURequestMsg, requestToDir);
+ out_port(notifyNetwork_out, CPURequestMsg, notifyToRBuffer);
+ out_port(probeNetwork_out, NBProbeRequestMsg, probeToRBuffer);
+
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=2) {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ assert(in_msg.addr == getRegionBase(in_msg.addr));
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ TBE tbe := getTBE(in_msg.addr);
+ DPRINTF(RubySlicc, "trigger msg: %s (%s)\n", in_msg, getRegionBase(in_msg.addr));
+ if (in_msg.Type == TriggerType:AcksComplete) {
+ assert(is_valid(tbe));
+ trigger(Event:LastAck, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == TriggerType:InvRegion) {
+ assert(is_valid(tbe));
+ trigger(Event:TriggerInv, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == TriggerType:DowngradeRegion) {
+ assert(is_valid(tbe));
+ trigger(Event:TriggerDowngrade, in_msg.addr, cache_entry, tbe);
+ } else {
+ error("Unknown trigger message");
+ }
+ }
+ }
+ }
+
+ in_port(responseNetwork_in, ResponseMsg, responseFromRBuffer, rank=1) {
+ if (responseNetwork_in.isReady(clockEdge())) {
+ peek(responseNetwork_in, ResponseMsg) {
+ TBE tbe := getTBE(in_msg.addr);
+ Entry cache_entry := getCacheEntry(in_msg.addr);
+ if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+ assert(in_msg.addr == getRegionBase(in_msg.addr));
+ assert(is_valid(tbe));
+ if (in_msg.NotCached) {
+ trigger(Event:InvAckCoreNoShare, in_msg.addr, cache_entry, tbe);
+ } else {
+ trigger(Event:InvAckCore, in_msg.addr, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceResponseType:PrivateAck) {
+ assert(in_msg.addr == getRegionBase(in_msg.addr));
+ assert(is_valid(cache_entry));
+ //Fix Me...add back in: assert(cache_entry.Sharers.isElement(in_msg.Sender));
+ trigger(Event:CPUPrivateAck, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceResponseType:RegionWbAck) {
+ //Fix Me...add back in: assert(cache_entry.Sharers.isElement(in_msg.Sender) == false);
+ assert(in_msg.addr == getRegionBase(in_msg.addr));
+ trigger(Event:WritebackAck, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceResponseType:DirReadyAck) {
+ assert(is_valid(tbe));
+ trigger(Event:DirReadyAck, getRegionBase(in_msg.addr), cache_entry, tbe);
+ } else {
+ error("Invalid response type");
+ }
+ }
+ }
+ }
+
+ // In from cores
+ // NOTE: We get the cache / TBE entry based on the region address,
+ // but pass the block address to the actions
+ in_port(requestNetwork_in, CPURequestMsg, requestFromRegBuf, rank=0) {
+ if (requestNetwork_in.isReady(clockEdge())) {
+ peek(requestNetwork_in, CPURequestMsg) {
+ //assert(in_msg.addr == getRegionBase(in_msg.addr));
+ Addr address := getRegionBase(in_msg.addr);
+ DPRINTF(RubySlicc, "Got %s, base %s\n", in_msg.addr, address);
+ if (presentOrAvail(address)) {
+ TBE tbe := getTBE(address);
+ Entry cache_entry := getCacheEntry(address);
+ if (in_msg.Type == CoherenceRequestType:PrivateRequest) {
+ if (is_valid(cache_entry) && (cache_entry.Owner != in_msg.Requestor ||
+ getState(tbe, cache_entry, address) == State:S)) {
+ trigger(Event:SendInv, address, cache_entry, tbe);
+ } else {
+ trigger(Event:PrivateRequest, address, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceRequestType:SharedRequest) {
+ if (is_invalid(cache_entry)) {
+ // If no one has ever requested this region give private permissions
+ trigger(Event:PrivateRequest, address, cache_entry, tbe);
+ } else {
+ if (always_migrate ||
+ (sym_migrate && symMigrate(cache_entry)) ||
+ (asym_migrate && asymMigrate(cache_entry, in_msg.Requestor))) {
+ if (cache_entry.Sharers.count() == 1 &&
+ cache_entry.Sharers.isElement(in_msg.Requestor)) {
+ trigger(Event:UpgradeRequest, address, cache_entry, tbe);
+ } else {
+ trigger(Event:SendInv, address, cache_entry, tbe);
+ }
+ } else { // don't migrate
+ if(cache_entry.Sharers.isElement(in_msg.Requestor) ||
+ getState(tbe, cache_entry, address) == State:S) {
+ trigger(Event:SharedRequest, address, cache_entry, tbe);
+ } else {
+ trigger(Event:SendDowngrade, address, cache_entry, tbe);
+ }
+ }
+ }
+ } else if (in_msg.Type == CoherenceRequestType:UpgradeRequest) {
+ if (is_invalid(cache_entry)) {
+ trigger(Event:PrivateRequest, address, cache_entry, tbe);
+ } else if (cache_entry.Sharers.count() == 1 && cache_entry.Sharers.isElement(in_msg.Requestor)) {
+ trigger(Event:UpgradeRequest, address, cache_entry, tbe);
+ } else {
+ trigger(Event:SendUpgrade, address, cache_entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceRequestType:CleanWbRequest) {
+ if (is_invalid(cache_entry) || cache_entry.Sharers.isElement(in_msg.Requestor) == false) {
+ trigger(Event:StaleCleanWbRequest, address, cache_entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "wb address %s(%s) owner %s sharers %s requestor %s %d %d\n", in_msg.addr, getRegionBase(in_msg.addr), cache_entry.Owner, cache_entry.Sharers, in_msg.Requestor, cache_entry.Sharers.isElement(in_msg.Requestor), cache_entry.Sharers.count());
+ if (cache_entry.Sharers.isElement(in_msg.Requestor) && cache_entry.Sharers.count() == 1) {
+ DPRINTF(RubySlicc, "last wb\n");
+ trigger(Event:CleanWbRequest_LastSharer, address, cache_entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "clean wb\n");
+ trigger(Event:CleanWbRequest, address, cache_entry, tbe);
+ }
+ }
+ } else {
+ error("unknown region dir request type");
+ }
+ } else {
+ Addr victim := cacheMemory.cacheProbe(getRegionBase(in_msg.addr));
+ TBE victim_tbe := getTBE(victim);
+ Entry victim_entry := getCacheEntry(victim);
+ DPRINTF(RubySlicc, "Evicting address %s for new region at address %s(%s)\n", victim, in_msg.addr, getRegionBase(in_msg.addr));
+ assert(is_valid(victim_entry));
+ trigger(Event:Evict, victim, victim_entry, victim_tbe);
+ }
+ }
+ }
+ }
+
+ // Actions
+
+ action(f_fwdReqToDir, "f", desc="Forward CPU request to directory") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+ out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address
+ out_msg.Type := in_msg.OriginalType;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Dirty := in_msg.Dirty;
+ out_msg.Requestor := getCoreMachine(in_msg.Requestor,address);
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+ out_msg.Shared := in_msg.Shared;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.Private := in_msg.Private;
+ out_msg.NoAckNeeded := true;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ProbeRequestStartTime := curCycle();
+ out_msg.DemandRequest := true;
+ if (is_valid(cache_entry) && getState(tbe, cache_entry, address) != State:S) {
+ out_msg.Acks := cache_entry.Sharers.count();
+ } else {
+ out_msg.Acks := 0;
+ }
+ }
+ }
+ }
+
+ action(f_fwdReqToDirShared, "fs", desc="Forward CPU request to directory (shared)") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+ out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address
+ out_msg.Type := in_msg.OriginalType;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Dirty := in_msg.Dirty;
+ out_msg.Requestor := getCoreMachine(in_msg.Requestor,address);
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+ out_msg.Shared := in_msg.Shared;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.Private := in_msg.Private;
+ out_msg.NoAckNeeded := true;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ProbeRequestStartTime := curCycle();
+ out_msg.DemandRequest := true;
+ out_msg.ForceShared := true;
+ if (is_valid(cache_entry) && getState(tbe, cache_entry, address) != State:S) {
+ out_msg.Acks := cache_entry.Sharers.count();
+ } else {
+ out_msg.Acks := 0;
+ }
+ }
+ }
+ }
+
+ action(f_fwdReqToDirWithAck, "fa", desc="Forward CPU request to directory with ack request") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+ out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address
+ out_msg.Type := in_msg.OriginalType;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Dirty := in_msg.Dirty;
+ out_msg.Requestor := getCoreMachine(in_msg.Requestor,address);
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+ out_msg.Shared := in_msg.Shared;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.Private := in_msg.Private;
+ out_msg.NoAckNeeded := false;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ProbeRequestStartTime := curCycle();
+ out_msg.DemandRequest := true;
+ if (is_valid(cache_entry)) {
+ out_msg.Acks := cache_entry.Sharers.count();
+ // Don't need an ack from the requestor!
+ if (cache_entry.Sharers.isElement(in_msg.Requestor)) {
+ out_msg.Acks := out_msg.Acks - 1;
+ }
+ } else {
+ out_msg.Acks := 0;
+ }
+ }
+ }
+ }
+
+ action(f_fwdReqToDirWithAckShared, "fas", desc="Forward CPU request to directory with ack request") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+ out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address
+ out_msg.Type := in_msg.OriginalType;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Dirty := in_msg.Dirty;
+ out_msg.Requestor := getCoreMachine(in_msg.Requestor,address);
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+ out_msg.Shared := in_msg.Shared;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.Private := in_msg.Private;
+ out_msg.NoAckNeeded := false;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ProbeRequestStartTime := curCycle();
+ out_msg.DemandRequest := true;
+ out_msg.ForceShared := true;
+ if (is_valid(cache_entry)) {
+ out_msg.Acks := cache_entry.Sharers.count();
+ // Don't need an ack from the requestor!
+ if (cache_entry.Sharers.isElement(in_msg.Requestor)) {
+ out_msg.Acks := out_msg.Acks - 1;
+ }
+ } else {
+ out_msg.Acks := 0;
+ }
+ }
+ }
+ }
+
+ action(a_allocateRegionEntry, "a", desc="Allocate a new entry") {
+ set_cache_entry(cacheMemory.allocate(getRegionBase(address), new Entry));
+ peek(requestNetwork_in, CPURequestMsg) {
+ APPEND_TRANSITION_COMMENT(in_msg.Requestor);
+ }
+ }
+
+ action(d_deallocateRegionEntry, "d", desc="Deallocate region entry") {
+ cacheMemory.deallocate(getRegionBase(address));
+ unset_cache_entry();
+ }
+
+ action(ra_receiveAck, "ra", desc="Mark TBE entry as received this ack") {
+ //assert(tbe.ValidBlocks.at(getRegionOffset(address)));
+ DPRINTF(RubySlicc, "received ack for %s reg: %s\n", address, getRegionBase(address));
+ tbe.NumValidBlocks := tbe.NumValidBlocks - 1;
+ assert(tbe.NumValidBlocks >= 0);
+ if (tbe.NumValidBlocks == 0) {
+ tbe.AllAcksReceived := true;
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.Type := TriggerType:AcksComplete;
+ out_msg.addr := address;
+ }
+ }
+ APPEND_TRANSITION_COMMENT(getRegionBase(address));
+ APPEND_TRANSITION_COMMENT(" Acks left receive ");
+ APPEND_TRANSITION_COMMENT(tbe.NumValidBlocks);
+ }
+
+ action(ca_checkAcks, "ca", desc="Check to see if we need more acks") {
+ if (tbe.NumValidBlocks == 0) {
+ tbe.AllAcksReceived := true;
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.Type := TriggerType:AcksComplete;
+ out_msg.addr := address;
+ }
+ }
+ }
+
+ action(ti_triggerInv, "ti", desc="") {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.Type := TriggerType:InvRegion;
+ out_msg.addr := address;
+ }
+ }
+
+ action(td_triggerDowngrade, "td", desc="") {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.Type := TriggerType:DowngradeRegion;
+ out_msg.addr := address;
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ TBEs.allocate(getRegionBase(address));
+ set_tbe(getTBE(address));
+ if (is_valid(cache_entry)) {
+ tbe.Owner := cache_entry.Owner;
+ tbe.Sharers := cache_entry.Sharers;
+ tbe.AllAcksReceived := true; // assume no acks are required
+ }
+ tbe.ProbeRequestTime := curCycle();
+ peek(requestNetwork_in, CPURequestMsg) {
+ tbe.InitialRequestTime := in_msg.InitialRequestTime;
+ tbe.DemandAddress := in_msg.addr;
+ }
+ APPEND_TRANSITION_COMMENT(getRegionBase(address));
+ APPEND_TRANSITION_COMMENT(" Acks left ");
+ APPEND_TRANSITION_COMMENT(tbe.NumValidBlocks);
+ APPEND_TRANSITION_COMMENT(" Owner, ");
+ APPEND_TRANSITION_COMMENT(tbe.Owner);
+ APPEND_TRANSITION_COMMENT(" sharers, ");
+ APPEND_TRANSITION_COMMENT(tbe.Sharers);
+ }
+
+ action(ss_setSharers, "ss", desc="Add requestor to sharers") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ cache_entry.Sharers.add(in_msg.Requestor);
+ APPEND_TRANSITION_COMMENT(cache_entry.Sharers);
+ }
+ }
+
+ action(rs_removeSharer, "rs", desc="Remove requestor to sharers") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ cache_entry.Sharers.remove(in_msg.Requestor);
+ APPEND_TRANSITION_COMMENT(" removing ");
+ APPEND_TRANSITION_COMMENT(in_msg.Requestor);
+ APPEND_TRANSITION_COMMENT(" sharers ");
+ APPEND_TRANSITION_COMMENT(cache_entry.Sharers);
+ }
+ }
+
+ action(rsr_removeSharerResponse, "rsr", desc="Remove requestor to sharers") {
+ peek(responseNetwork_in, ResponseMsg) {
+ cache_entry.Sharers.remove(in_msg.Sender);
+ APPEND_TRANSITION_COMMENT(cache_entry.Sharers);
+ }
+ }
+
+ action(cs_clearSharers, "cs", desc="Add requestor to sharers") {
+ cache_entry.Sharers.clear();
+ }
+
+ action(so_setOwner, "so", desc="Set the owner to the requestor") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ cache_entry.Owner := in_msg.Requestor;
+ APPEND_TRANSITION_COMMENT(" Owner now: ");
+ APPEND_TRANSITION_COMMENT(cache_entry.Owner);
+ }
+ }
+
+ action(rr_removeRequestorFromTBE, "rr", desc="Remove requestor from TBE sharers") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ tbe.Sharers.remove(in_msg.Requestor);
+ }
+ }
+
+ action(ur_updateDirtyStatusOnRequest, "ur", desc="Update dirty status on demand request") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ if (is_valid(cache_entry)) {
+ if ((in_msg.Type == CoherenceRequestType:SharedRequest) &&
+ (cache_entry.Sharers.isElement(in_msg.Requestor) == false)) {
+ cache_entry.LastWriten := false;
+ if (isCpuMachine(in_msg.Requestor)) {
+ cache_entry.LastWritenByCpu := false;
+ } else {
+ cache_entry.LastWritenByGpu := false;
+ }
+ } else if ((in_msg.Type == CoherenceRequestType:PrivateRequest) ||
+ (in_msg.Type == CoherenceRequestType:UpgradeRequest)) {
+ cache_entry.LastWriten := true;
+ if (isCpuMachine(in_msg.Requestor)) {
+ cache_entry.LastWritenByCpu := true;
+ } else {
+ cache_entry.LastWritenByGpu := true;
+ }
+ }
+ }
+ }
+ }
+
+ action(ud_updateDirtyStatusWithWb, "ud", desc="Update dirty status on writeback") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ if (is_valid(cache_entry) && in_msg.Dirty) {
+ cache_entry.LastWriten := true;
+ if (isCpuMachine(in_msg.Requestor)) {
+ cache_entry.LastWritenByCpu := true;
+ } else {
+ cache_entry.LastWritenByGpu := true;
+ }
+ }
+ }
+ }
+
+ action(sns_setNumAcksSharers, "sns", desc="Set number of acks to one per shared region buffer") {
+ assert(is_valid(tbe));
+ assert(is_valid(cache_entry));
+ tbe.NumValidBlocks := tbe.Sharers.count();
+ }
+
+ action(sno_setNumAcksOne, "sno", desc="Set number of acks to one per shared region buffer") {
+ assert(is_valid(tbe));
+ assert(is_valid(cache_entry));
+ tbe.NumValidBlocks := 1;
+ }
+
+ action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+ TBEs.deallocate(getRegionBase(address));
+ APPEND_TRANSITION_COMMENT(" reg: ");
+ APPEND_TRANSITION_COMMENT(getRegionBase(address));
+ unset_tbe();
+ }
+
+ action(wb_sendWbNotice, "wb", desc="Send notice to cache that writeback is acknowledged") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceRequestType:WbNotify;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Requestor := machineID;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ }
+ }
+ }
+
+ action(wbn_sendWbNoticeNoAck, "wbn", desc="Send notice to cache that writeback is acknowledged (no ack needed)") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceRequestType:WbNotify;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Requestor := machineID;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.NoAckNeeded := true;
+ }
+ }
+ }
+
+ action(b_sendPrivateNotice, "b", desc="Send notice to private cache that it has private access") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceRequestType:PrivateNotify;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Requestor := machineID;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ }
+ }
+ }
+
+ action(bs_sendSharedNotice, "bs", desc="Send notice to private cache that it has private access") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceRequestType:SharedNotify;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Requestor := machineID;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ }
+ }
+ }
+
+ action(c_sendSharedNoticeToOrigReq, "c", desc="Send notice to private cache that it has shared access") {
+ assert(is_valid(tbe));
+ enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceRequestType:SharedNotify;
+ out_msg.Destination.add(tbe.Owner);
+ out_msg.Requestor := machineID;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestTime;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ APPEND_TRANSITION_COMMENT("dest: ");
+ APPEND_TRANSITION_COMMENT(out_msg.Destination);
+ }
+ }
+
+ action(sp_sendPrivateNoticeToOrigReq, "sp", desc="Send notice to private cache that it has private access") {
+ assert(is_valid(tbe));
+ enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+ out_msg.addr := getRegionBase(address);
+ out_msg.Type := CoherenceRequestType:PrivateNotify;
+ out_msg.Destination.add(tbe.Owner);
+ out_msg.Requestor := machineID;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestTime;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ APPEND_TRANSITION_COMMENT("dest: ");
+ APPEND_TRANSITION_COMMENT(out_msg.Destination);
+ }
+ }
+
+ action(i_RegionInvNotify, "i", desc="Send notice to private cache that it no longer has private access") {
+ enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.DemandAddress := tbe.DemandAddress;
+ //out_msg.Requestor := tbe.Requestor;
+ out_msg.Requestor := machineID;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ //Fix me: assert(tbe.Sharers.count() > 0);
+ out_msg.DemandRequest := true;
+ out_msg.Destination := tbe.Sharers;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ APPEND_TRANSITION_COMMENT("dest: ");
+ APPEND_TRANSITION_COMMENT(out_msg.Destination);
+ }
+ }
+
+ action(i0_RegionInvNotifyDemand0, "i0", desc="Send notice to private cache that it no longer has private access") {
+ enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+ out_msg.addr := address;
+ // Demand address should default to 0 -> out_msg.DemandAddress := 0;
+ out_msg.Requestor := machineID;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.Destination := tbe.Sharers;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ APPEND_TRANSITION_COMMENT("dest: ");
+ APPEND_TRANSITION_COMMENT(out_msg.Destination);
+ }
+ }
+
+ action(rd_RegionDowngrade, "rd", desc="Send notice to private cache that it only has shared access") {
+ enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+ out_msg.addr := address;
+ out_msg.DemandAddress := tbe.DemandAddress;
+ out_msg.Requestor := machineID;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.DemandRequest := true;
+ out_msg.Destination := tbe.Sharers;
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ APPEND_TRANSITION_COMMENT("dest: ");
+ APPEND_TRANSITION_COMMENT(out_msg.Destination);
+ }
+ }
+
+ action(p_popRequestQueue, "p", desc="Pop the request queue") {
+ requestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pt_popTriggerQueue, "pt", desc="Pop the trigger queue") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="Pop the response queue") {
+ responseNetwork_in.dequeue(clockEdge());
+ }
+
+ action(s_stallAndWaitRequest, "s", desc="Stall and wait on the region address") {
+ Addr regAddr := getRegionBase(address);
+ stall_and_wait(requestNetwork_in, regAddr);
+ }
+
+ action(w_wakeUpRegionDependents, "w", desc="Wake up any requests waiting for this region") {
+ wakeUpBuffers(getRegionBase(address));
+ }
+
+ action(wa_wakeUpAllDependents, "wa", desc="Wake up any requests waiting for this region") {
+ wakeUpAllBuffers();
+ }
+
+ action(zz_recycleRequestQueue, "\z", desc="...") {
+ requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(z_stall, "z", desc="stall request queue") {
+ // fake state
+ }
+
+ action(mru_setMRU, "mru", desc="set MRU") {
+ cacheMemory.setMRU(address);
+ }
+
+ // Transistions
+
+ transition({NP_P, P_P, NP_S, S_S, S_P, P_S, P_NP, S_AP, P_AS, P_AP, SP_NP_W, S_W, P_AP_W, P_AS_W, S_AP_W}, {PrivateRequest, SharedRequest, UpgradeRequest, SendInv, SendUpgrade, SendDowngrade, CleanWbRequest, CleanWbRequest_LastSharer, StaleCleanWbRequest}) {
+ s_stallAndWaitRequest
+ }
+
+ transition({NP_P, P_P, NP_S, S_S, S_P, S_W, P_S, P_NP, S_AP, P_AS, P_AP, P_AP_W, P_AS_W, S_AP_W}, Evict) {
+ zz_recycleRequestQueue;
+ }
+
+ transition(NP, {PrivateRequest, SendUpgrade}, NP_P) {TagArrayRead, TagArrayWrite} {
+ a_allocateRegionEntry;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDir;
+ b_sendPrivateNotice;
+ so_setOwner;
+ ss_setSharers;
+ t_allocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition(P, {PrivateRequest, UpgradeRequest}, P_P) {TagArrayRead} {
+ mru_setMRU;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDir;
+ b_sendPrivateNotice;
+ t_allocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition({NP_P, P_P}, CPUPrivateAck, P) {
+ dt_deallocateTBE;
+ w_wakeUpRegionDependents;
+ pr_popResponseQueue;
+ }
+
+ transition({NP, P, S}, StaleCleanWbRequest) {TagArrayRead, TagArrayWrite} {
+ wbn_sendWbNoticeNoAck;
+ ud_updateDirtyStatusWithWb;
+ p_popRequestQueue;
+ }
+
+ transition(NP, SharedRequest, NP_S) {TagArrayRead, TagArrayWrite} {
+ a_allocateRegionEntry;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDirShared;
+ bs_sendSharedNotice;
+ so_setOwner;
+ ss_setSharers;
+ t_allocateTBE;
+ p_popRequestQueue;
+ }
+
+ // Could probably do this in parallel with other shared requests
+ transition(S, SharedRequest, S_S) {TagArrayRead, TagArrayWrite} {
+ mru_setMRU;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDirShared;
+ bs_sendSharedNotice;
+ ss_setSharers;
+ t_allocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition({P, S}, CleanWbRequest_LastSharer, SP_NP_W) {TagArrayRead, TagArrayWrite} {
+ ud_updateDirtyStatusWithWb;
+ wb_sendWbNotice;
+ rs_removeSharer;
+ t_allocateTBE;
+ d_deallocateRegionEntry;
+ p_popRequestQueue;
+ }
+
+ transition(S, CleanWbRequest, S_W) {TagArrayRead, TagArrayWrite} {
+ ud_updateDirtyStatusWithWb;
+ wb_sendWbNotice;
+ rs_removeSharer;
+ t_allocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition(SP_NP_W, WritebackAck, NP) {
+ dt_deallocateTBE;
+ w_wakeUpRegionDependents;
+ pr_popResponseQueue;
+ }
+
+ transition(S_W, WritebackAck, S) {
+ dt_deallocateTBE;
+ w_wakeUpRegionDependents;
+ pr_popResponseQueue;
+ }
+
+ transition({NP_S, S_S}, CPUPrivateAck, S) {
+ dt_deallocateTBE;
+ w_wakeUpRegionDependents;
+ pr_popResponseQueue;
+ }
+
+ transition(S, UpgradeRequest, S_P) {TagArrayRead, TagArrayWrite} {
+ mru_setMRU;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDir;
+ b_sendPrivateNotice;
+ so_setOwner;
+ t_allocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition(S_P, CPUPrivateAck, P) {
+ dt_deallocateTBE;
+ w_wakeUpRegionDependents;
+ pr_popResponseQueue;
+ }
+
+ transition(P, SendInv, P_AP_W) {TagArrayRead, TagArrayWrite} {
+ mru_setMRU;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDirWithAck;
+ so_setOwner;
+ t_allocateTBE;
+ rr_removeRequestorFromTBE;
+ sns_setNumAcksSharers;
+ cs_clearSharers;
+ ss_setSharers;
+ //i_RegionInvNotify;
+ p_popRequestQueue;
+ }
+
+ transition({P_AP_W, S_AP_W}, DirReadyAck) {
+ ti_triggerInv;
+ pr_popResponseQueue;
+ }
+
+ transition(P_AS_W, DirReadyAck) {
+ td_triggerDowngrade;
+ pr_popResponseQueue;
+ }
+
+ transition(P_AS_W, TriggerDowngrade, P_AS) {
+ rd_RegionDowngrade;
+ pt_popTriggerQueue;
+ }
+
+ transition(P_AP_W, TriggerInv, P_AP) {
+ i_RegionInvNotify;
+ pt_popTriggerQueue;
+ }
+
+ transition(S_AP_W, TriggerInv, S_AP) {
+ i_RegionInvNotify;
+ pt_popTriggerQueue;
+ }
+
+ transition(P, SendUpgrade, P_AP_W) {TagArrayRead, TagArrayWrite} {
+ mru_setMRU;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDirWithAck;
+ so_setOwner;
+ t_allocateTBE;
+ rr_removeRequestorFromTBE;
+ sns_setNumAcksSharers;
+ cs_clearSharers;
+ ss_setSharers;
+ p_popRequestQueue;
+ }
+
+ transition(P, Evict, P_NP) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ sns_setNumAcksSharers;
+ i0_RegionInvNotifyDemand0;
+ d_deallocateRegionEntry;
+ }
+
+ transition(S, SendInv, P_AP_W) {TagArrayRead, TagArrayWrite} {
+ mru_setMRU;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDirWithAck;
+ so_setOwner;
+ t_allocateTBE;
+ rr_removeRequestorFromTBE;
+ sns_setNumAcksSharers;
+ cs_clearSharers;
+ ss_setSharers;
+ p_popRequestQueue;
+ }
+
+ transition(S, Evict, P_NP) {TagArrayRead, TagArrayWrite} {
+ t_allocateTBE;
+ sns_setNumAcksSharers;
+ i0_RegionInvNotifyDemand0;
+ d_deallocateRegionEntry;
+ }
+
+ transition(P_NP, LastAck, NP) {
+ dt_deallocateTBE;
+ wa_wakeUpAllDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(S, SendUpgrade, S_AP_W) {TagArrayRead, TagArrayWrite} {
+ mru_setMRU;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDirWithAck;
+ so_setOwner;
+ t_allocateTBE;
+ rr_removeRequestorFromTBE;
+ sns_setNumAcksSharers;
+ cs_clearSharers;
+ ss_setSharers;
+ p_popRequestQueue;
+ }
+
+ transition(S_AP, LastAck, S_P) {
+ sp_sendPrivateNoticeToOrigReq;
+ pt_popTriggerQueue;
+ }
+
+ transition(P_AP, LastAck, P_P) {
+ sp_sendPrivateNoticeToOrigReq;
+ pt_popTriggerQueue;
+ }
+
+ transition(P, SendDowngrade, P_AS_W) {TagArrayRead, TagArrayWrite} {
+ mru_setMRU;
+ ur_updateDirtyStatusOnRequest;
+ f_fwdReqToDirWithAckShared;
+ so_setOwner;
+ t_allocateTBE;
+ sns_setNumAcksSharers;
+ ss_setSharers; //why do we set the sharers before sending the downgrade? Are we sending a downgrade to the requestor?
+ p_popRequestQueue;
+ }
+
+ transition(P_AS, LastAck, P_S) {
+ c_sendSharedNoticeToOrigReq;
+ pt_popTriggerQueue;
+ }
+
+ transition(P_S, CPUPrivateAck, S) {
+ dt_deallocateTBE;
+ w_wakeUpRegionDependents;
+ pr_popResponseQueue;
+ }
+
+ transition({P_NP, P_AS, S_AP, P_AP}, InvAckCore) {} {
+ ra_receiveAck;
+ pr_popResponseQueue;
+ }
+
+ transition({P_NP, S_AP, P_AP}, InvAckCoreNoShare) {} {
+ ra_receiveAck;
+ pr_popResponseQueue;
+ }
+
+ transition(P_AS, InvAckCoreNoShare) {} {
+ ra_receiveAck;
+ rsr_removeSharerResponse;
+ pr_popResponseQueue;
+ }
+
+}
+
+
diff --git a/src/mem/protocol/MOESI_AMD_Base-dir.sm b/src/mem/protocol/MOESI_AMD_Base-dir.sm
new file mode 100644
index 000000000..52cefda66
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-dir.sm
@@ -0,0 +1,1137 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:Directory, "AMD Baseline protocol")
+: DirectoryMemory * directory;
+ CacheMemory * L3CacheMemory;
+ Cycles response_latency := 5;
+ Cycles l3_hit_latency := 50;
+ bool noTCCdir := "False";
+ bool CPUonly := "False";
+ int TCC_select_num_bits;
+ bool useL3OnWT := "False";
+ Cycles to_memory_controller_latency := 1;
+
+ // From the Cores
+ MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseFromCores, network="From", virtual_network="2", vnet_type="response";
+ MessageBuffer * unblockFromCores, network="From", virtual_network="4", vnet_type="unblock";
+
+ MessageBuffer * probeToCore, network="To", virtual_network="0", vnet_type="request";
+ MessageBuffer * responseToCore, network="To", virtual_network="2", vnet_type="response";
+
+ MessageBuffer * triggerQueue;
+ MessageBuffer * L3triggerQueue;
+ MessageBuffer * responseFromMemory;
+{
+ // STATES
+ state_declaration(State, desc="Directory states", default="Directory_State_U") {
+ U, AccessPermission:Backing_Store, desc="unblocked";
+ BL, AccessPermission:Busy, desc="got L3 WB request";
+ // BL is Busy because it's possible for the data only to be in the network
+ // in the WB, L3 has sent it and gone on with its business in possibly I
+ // state.
+ BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
+ BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
+ B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
+ BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory";
+ BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
+ BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
+ B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
+ BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack";
+ }
+
+ // Events
+ enumeration(Event, desc="Directory events") {
+ // CPU requests
+ RdBlkS, desc="...";
+ RdBlkM, desc="...";
+ RdBlk, desc="...";
+ CtoD, desc="...";
+ WriteThrough, desc="WriteThrough Message";
+ Atomic, desc="Atomic Message";
+
+ // writebacks
+ VicDirty, desc="...";
+ VicClean, desc="...";
+ CPUData, desc="WB data from CPU";
+ StaleWB, desc="Notification that WB has been superceded by a probe";
+
+ // probe responses
+ CPUPrbResp, desc="Probe Response Msg";
+
+ ProbeAcksComplete, desc="Probe Acks Complete";
+
+ L3Hit, desc="Hit in L3 return data to core";
+
+ // Memory Controller
+ MemData, desc="Fetched data from memory arrives";
+ WBAck, desc="Writeback Ack from memory arrives";
+
+ CoreUnblock, desc="Core received data, unblock";
+ UnblockWriteThrough, desc="Unblock because of writethrough request finishing";
+
+ StaleVicDirty, desc="Core invalidated before VicDirty processed";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ L3DataArrayRead, desc="Read the data array";
+ L3DataArrayWrite, desc="Write the data array";
+ L3TagArrayRead, desc="Read the data array";
+ L3TagArrayWrite, desc="Write the data array";
+ }
+
+ // TYPES
+
+ // DirectoryEntry
+ structure(Entry, desc="...", interface="AbstractEntry") {
+ State DirectoryState, desc="Directory state";
+ DataBlock DataBlk, desc="data for the block";
+ NetDest VicDirtyIgnore, desc="VicDirty coming from whom to ignore";
+ }
+
+ structure(CacheEntry, desc="...", interface="AbstractCacheEntry") {
+ DataBlock DataBlk, desc="data for the block";
+ MachineID LastSender, desc="Mach which this block came from";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block";
+ bool Dirty, desc="Is the data dirty?";
+ int NumPendingAcks, desc="num acks expected";
+ MachineID OriginalRequestor, desc="Original Requestor";
+ MachineID WTRequestor, desc="WT Requestor";
+ bool Cached, desc="data hit in Cache";
+ bool MemData, desc="Got MemData?",default="false";
+ bool wtData, desc="Got write through data?",default="false";
+ bool atomicData, desc="Got Atomic op?",default="false";
+ Cycles InitialRequestTime, desc="...";
+ Cycles ForwardRequestTime, desc="...";
+ Cycles ProbeRequestStartTime, desc="...";
+ MachineID LastSender, desc="Mach which this block came from";
+ bool L3Hit, default="false", desc="Was this an L3 hit?";
+ uint64_t probe_id, desc="probe id for lifetime profiling";
+ WriteMask writeMask, desc="outstanding write through mask";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
+
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ void set_tbe(TBE a);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" {
+ Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr));
+
+ if (is_valid(dir_entry)) {
+ return dir_entry;
+ }
+
+ dir_entry := static_cast(Entry, "pointer",
+ directory.allocate(addr, new Entry));
+ return dir_entry;
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if (is_valid(tbe) && tbe.MemData) {
+ DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe);
+ return tbe.DataBlk;
+ }
+ DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr));
+ return getDirectoryEntry(addr).DataBlk;
+ }
+
+ State getState(TBE tbe, CacheEntry entry, Addr addr) {
+ return getDirectoryEntry(addr).DirectoryState;
+ }
+
+ void setState(TBE tbe, CacheEntry entry, Addr addr, State state) {
+ getDirectoryEntry(addr).DirectoryState := state;
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes
+ + functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ // For this Directory, all permissions are just tracked in Directory, since
+ // it's not possible to have something in TBE but not Dir, just keep track
+ // of state all in one place.
+ if (directory.isPresent(addr)) {
+ return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void setAccessPermission(CacheEntry entry, Addr addr, State state) {
+ getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state));
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L3DataArrayRead) {
+ L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L3DataArrayWrite) {
+ L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L3TagArrayRead) {
+ L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L3TagArrayWrite) {
+ L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L3DataArrayRead) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L3DataArrayWrite) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L3TagArrayRead) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L3TagArrayWrite) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ // ** OUT_PORTS **
+ out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore);
+ out_port(responseNetwork_out, ResponseMsg, responseToCore);
+
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+ out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue);
+
+ // ** IN_PORTS **
+
+ // Trigger Queue
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=5) {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == TriggerType:AcksComplete) {
+ trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe);
+ }else if (in_msg.Type == TriggerType:UnblockWriteThrough) {
+ trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe);
+ } else {
+ error("Unknown trigger msg");
+ }
+ }
+ }
+ }
+
+ in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=4) {
+ if (L3TriggerQueue_in.isReady(clockEdge())) {
+ peek(L3TriggerQueue_in, TriggerMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == TriggerType:L3Hit) {
+ trigger(Event:L3Hit, in_msg.addr, entry, tbe);
+ } else {
+ error("Unknown trigger msg");
+ }
+ }
+ }
+ }
+
+ // Unblock Network
+ in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=3) {
+ if (unblockNetwork_in.isReady(clockEdge())) {
+ peek(unblockNetwork_in, UnblockMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ trigger(Event:CoreUnblock, in_msg.addr, entry, tbe);
+ }
+ }
+ }
+
+ // Core response network
+ in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=2) {
+ if (responseNetwork_in.isReady(clockEdge())) {
+ peek(responseNetwork_in, ResponseMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+ trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceResponseType:CPUData) {
+ trigger(Event:CPUData, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+ trigger(Event:StaleWB, in_msg.addr, entry, tbe);
+ } else {
+ error("Unexpected response type");
+ }
+ }
+ }
+ }
+
+ // off-chip memory request/response is done
+ in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=1) {
+ if (memQueue_in.isReady(clockEdge())) {
+ peek(memQueue_in, MemoryMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
+ trigger(Event:MemData, in_msg.addr, entry, tbe);
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
+ trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them.
+ } else {
+ DPRINTF(RubySlicc, "%s\n", in_msg.Type);
+ error("Invalid message");
+ }
+ }
+ }
+ }
+
+ in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) {
+ if (requestNetwork_in.isReady(clockEdge())) {
+ peek(requestNetwork_in, CPURequestMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == CoherenceRequestType:RdBlk) {
+ trigger(Event:RdBlk, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+ trigger(Event:RdBlkS, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+ trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+ trigger(Event:Atomic, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+ if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+ DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr);
+ trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr);
+ trigger(Event:VicDirty, in_msg.addr, entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+ if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+ DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr);
+ trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr);
+ trigger(Event:VicClean, in_msg.addr, entry, tbe);
+ }
+ } else {
+ error("Bad request message type");
+ }
+ }
+ }
+ }
+
+ // Actions
+ action(s_sendResponseS, "s", desc="send Shared response") {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Shared;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(es_sendResponseES, "es", desc="send Exclusive or Shared response") {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Cached) {
+ out_msg.State := CoherenceState:Shared;
+ } else {
+ out_msg.State := CoherenceState:Exclusive;
+ }
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(m_sendResponseM, "m", desc="send Modified response") {
+ if (tbe.wtData) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:UnblockWriteThrough;
+ }
+ }else{
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.CtoD := false;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ if(tbe.atomicData){
+ out_msg.WTRequestor := tbe.WTRequestor;
+ }
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ if (tbe.atomicData) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:UnblockWriteThrough;
+ }
+ }
+ }
+ }
+
+ action(c_sendResponseCtoD, "c", desc="send CtoD Ack") {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.CtoD := true;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysWBAck;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(l_queueMemWBReq, "lq", desc="Write WB data to memory") {
+ peek(responseNetwork_in, ResponseMsg) {
+ queueMemoryWrite(machineID, address, to_memory_controller_latency,
+ in_msg.DataBlk);
+ }
+ }
+
+ action(l_queueMemRdReq, "lr", desc="Read data from memory") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ if (tbe.Dirty == false) {
+ tbe.DataBlk := entry.DataBlk;
+ }
+ tbe.LastSender := entry.LastSender;
+ tbe.L3Hit := true;
+ tbe.MemData := true;
+ L3CacheMemory.deallocate(address);
+ } else {
+ queueMemoryRead(machineID, address, to_memory_controller_latency);
+ }
+ }
+ }
+
+ action(dc_probeInvCoreData, "dc", desc="probe inv cores, return data") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket
+
+ // add relevant TCC node to list. This replaces all TCPs and SQCs
+ if (((in_msg.Type == CoherenceRequestType:WriteThrough ||
+ in_msg.Type == CoherenceRequestType:Atomic) &&
+ in_msg.NoWriteConflict) ||
+ CPUonly) {
+ } else if (noTCCdir) {
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ } else {
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ }
+ out_msg.Destination.remove(in_msg.Requestor);
+ tbe.NumPendingAcks := out_msg.Destination.count();
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ APPEND_TRANSITION_COMMENT(" dc: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
+ peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket
+ // add relevant TCC node to the list. This replaces all TCPs and SQCs
+ if (noTCCdir || CPUonly) {
+ //Don't need to notify TCC about reads
+ } else {
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:TCCdir,
+ TCC_select_low_bit, TCC_select_num_bits));
+ tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+ }
+ if (noTCCdir && !CPUonly) {
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ }
+ out_msg.Destination.remove(in_msg.Requestor);
+ tbe.NumPendingAcks := out_msg.Destination.count();
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ DPRINTF(RubySlicc, "%s\n", (out_msg));
+ APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") {
+ peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := false;
+ out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket
+
+ // add relevant TCC node to the list. This replaces all TCPs and SQCs
+ if (noTCCdir && !CPUonly) {
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ } else {
+ if (!noTCCdir) {
+ out_msg.Destination.add(mapAddressToRange(address,
+ MachineType:TCCdir,
+ TCC_select_low_bit,
+ TCC_select_num_bits));
+ }
+ }
+ out_msg.Destination.remove(in_msg.Requestor);
+ tbe.NumPendingAcks := out_msg.Destination.count();
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(d_writeDataToMemory, "d", desc="Write data to memory") {
+ peek(responseNetwork_in, ResponseMsg) {
+ getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+ if (tbe.Dirty == false) {
+ // have to update the TBE, too, because of how this
+ // directory deals with functional writes
+ tbe.DataBlk := in_msg.DataBlk;
+ }
+ }
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ peek(requestNetwork_in, CPURequestMsg) {
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ tbe.writeMask.clear();
+ tbe.writeMask.orMask(in_msg.writeMask);
+ tbe.wtData := true;
+ tbe.WTRequestor := in_msg.WTRequestor;
+ tbe.LastSender := in_msg.Requestor;
+ }
+ if (in_msg.Type == CoherenceRequestType:Atomic) {
+ tbe.writeMask.clear();
+ tbe.writeMask.orMask(in_msg.writeMask);
+ tbe.atomicData := true;
+ tbe.WTRequestor := in_msg.WTRequestor;
+ tbe.LastSender := in_msg.Requestor;
+ }
+ tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+ tbe.Dirty := false;
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ tbe.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
+ tbe.Dirty := true;
+ }
+ tbe.OriginalRequestor := in_msg.Requestor;
+ tbe.NumPendingAcks := 0;
+ tbe.Cached := in_msg.ForceShared;
+ tbe.InitialRequestTime := in_msg.InitialRequestTime;
+ }
+ }
+
+ action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+ if (tbe.Dirty == false) {
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ }
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(wd_writeBackData, "wd", desc="Write back data if needed") {
+ if (tbe.wtData) {
+ getDirectoryEntry(address).DataBlk.copyPartial(tbe.DataBlk, tbe.writeMask);
+ } else if (tbe.atomicData) {
+ tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask);
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ } else if (tbe.Dirty == false) {
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ }
+ }
+
+ action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") {
+ peek(memQueue_in, MemoryMsg) {
+ if (tbe.wtData == true) {
+ // do nothing
+ } else if (tbe.Dirty == false) {
+ tbe.DataBlk := getDirectoryEntry(address).DataBlk;
+ }
+ tbe.MemData := true;
+ }
+ }
+
+ action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (in_msg.Dirty) {
+ if (tbe.wtData) {
+ DataBlock tmp := in_msg.DataBlk;
+ tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+ tbe.DataBlk := tmp;
+ tbe.writeMask.fillMask();
+ } else if (tbe.Dirty) {
+ if(tbe.atomicData == false && tbe.wtData == false) {
+ DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
+ assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data
+ }
+ } else {
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := in_msg.Dirty;
+ tbe.LastSender := in_msg.Sender;
+ }
+ }
+ if (in_msg.Hit) {
+ tbe.Cached := true;
+ }
+ }
+ }
+
+ action(mwc_markSinkWriteCancel, "mwc", desc="Mark to sink impending VicDirty") {
+ peek(responseNetwork_in, ResponseMsg) {
+ getDirectoryEntry(address).VicDirtyIgnore.add(in_msg.Sender);
+ APPEND_TRANSITION_COMMENT(" setting bit to sink VicDirty ");
+ }
+ }
+
+ action(x_decrementAcks, "x", desc="decrement Acks pending") {
+ tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+ APPEND_TRANSITION_COMMENT(" Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ }
+
+ action(o_checkForCompletion, "o", desc="check for ack completion") {
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ APPEND_TRANSITION_COMMENT(" Check: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ }
+
+ action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor);
+ }
+ }
+
+ action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+ entry.DataBlk := in_msg.DataBlk;
+ entry.LastSender := in_msg.Sender;
+ } else {
+ if (L3CacheMemory.cacheAvail(address) == false) {
+ Addr victim := L3CacheMemory.cacheProbe(address);
+ CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+ L3CacheMemory.lookup(victim));
+ queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+ victim_entry.DataBlk);
+ L3CacheMemory.deallocate(victim);
+ }
+ assert(L3CacheMemory.cacheAvail(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+ entry.DataBlk := in_msg.DataBlk;
+
+ entry.LastSender := in_msg.Sender;
+ }
+ }
+ }
+
+ action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") {
+ if ((tbe.wtData || tbe.atomicData) && useL3OnWT) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+ entry.DataBlk := tbe.DataBlk;
+ entry.LastSender := tbe.LastSender;
+ } else {
+ if (L3CacheMemory.cacheAvail(address) == false) {
+ Addr victim := L3CacheMemory.cacheProbe(address);
+ CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+ L3CacheMemory.lookup(victim));
+ queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+ victim_entry.DataBlk);
+ L3CacheMemory.deallocate(victim);
+ }
+ assert(L3CacheMemory.cacheAvail(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+ entry.DataBlk := tbe.DataBlk;
+ entry.LastSender := tbe.LastSender;
+ }
+ }
+ }
+
+ action(sf_setForwardReqTime, "sf", desc="...") {
+ tbe.ForwardRequestTime := curCycle();
+ }
+
+ action(dl_deallocateL3, "dl", desc="deallocate the L3 block") {
+ L3CacheMemory.deallocate(address);
+ }
+
+ action(p_popRequestQueue, "p", desc="pop request queue") {
+ requestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="pop response queue") {
+ responseNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pm_popMemQueue, "pm", desc="pop mem queue") {
+ memQueue_in.dequeue(clockEdge());
+ }
+
+ action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") {
+ L3TriggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(pu_popUnblockQueue, "pu", desc="pop unblock queue") {
+ unblockNetwork_in.dequeue(clockEdge());
+ }
+
+ action(zz_recycleRequestQueue, "zz", desc="recycle request queue") {
+ requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(yy_recycleResponseQueue, "yy", desc="recycle response queue") {
+ responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") {
+ stall_and_wait(requestNetwork_in, address);
+ }
+
+ action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") {
+ wakeUpBuffers(address);
+ }
+
+ action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") {
+ wakeUpAllBuffers();
+ }
+
+ action(z_stall, "z", desc="...") {
+ }
+
+ // TRANSITIONS
+ transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
+ st_stallAndWaitRequest;
+ }
+
+ // It may be possible to save multiple invalidations here!
+ transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {Atomic, WriteThrough}) {
+ st_stallAndWaitRequest;
+ }
+
+
+ // transitions from U
+ transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead} {
+ t_allocateTBE;
+ l_queueMemRdReq;
+ sc_probeShrCoreData;
+ p_popRequestQueue;
+ }
+
+ transition(U, WriteThrough, BM_PM) {L3TagArrayRead, L3TagArrayWrite} {
+ t_allocateTBE;
+ w_sendResponseWBAck;
+ l_queueMemRdReq;
+ dc_probeInvCoreData;
+ p_popRequestQueue;
+ }
+
+ transition(U, Atomic, BM_PM) {L3TagArrayRead, L3TagArrayWrite} {
+ t_allocateTBE;
+ l_queueMemRdReq;
+ dc_probeInvCoreData;
+ p_popRequestQueue;
+ }
+
+ transition(U, {RdBlkM}, BM_PM) {L3TagArrayRead} {
+ t_allocateTBE;
+ l_queueMemRdReq;
+ dc_probeInvCoreData;
+ p_popRequestQueue;
+ }
+
+ transition(U, RdBlk, B_PM) {L3TagArrayRead}{
+ t_allocateTBE;
+ l_queueMemRdReq;
+ sc_probeShrCoreData;
+ p_popRequestQueue;
+ }
+
+ transition(U, CtoD, BP) {L3TagArrayRead} {
+ t_allocateTBE;
+ ic_probeInvCore;
+ p_popRequestQueue;
+ }
+
+ transition(U, VicDirty, BL) {L3TagArrayRead} {
+ t_allocateTBE;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(U, VicClean, BL) {L3TagArrayRead} {
+ t_allocateTBE;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition(BL, {VicDirty, VicClean}) {
+ zz_recycleRequestQueue;
+ }
+
+ transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} {
+ d_writeDataToMemory;
+ al_allocateL3Block;
+ wa_wakeUpDependents;
+ dt_deallocateTBE;
+ pr_popResponseQueue;
+ }
+
+ transition(BL, StaleWB, U) {L3TagArrayWrite} {
+ dt_deallocateTBE;
+ wa_wakeUpAllDependents;
+ pr_popResponseQueue;
+ }
+
+ transition({B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
+ z_stall;
+ }
+
+ transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, WBAck) {
+ pm_popMemQueue;
+ }
+
+ transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) {
+ rv_removeVicDirtyIgnore;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition({B}, CoreUnblock, U) {
+ wa_wakeUpDependents;
+ pu_popUnblockQueue;
+ }
+
+ transition(B, UnblockWriteThrough, U) {
+ wa_wakeUpDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(BS_PM, MemData, BS_Pm) {} {
+ mt_writeMemDataToTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BM_PM, MemData, BM_Pm){} {
+ mt_writeMemDataToTBE;
+ pm_popMemQueue;
+ }
+
+ transition(B_PM, MemData, B_Pm){} {
+ mt_writeMemDataToTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BS_PM, L3Hit, BS_Pm) {} {
+ ptl_popTriggerQueue;
+ }
+
+ transition(BM_PM, L3Hit, BM_Pm) {} {
+ ptl_popTriggerQueue;
+ }
+
+ transition(B_PM, L3Hit, B_Pm) {} {
+ ptl_popTriggerQueue;
+ }
+
+ transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ mt_writeMemDataToTBE;
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ mt_writeMemDataToTBE;
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ mt_writeMemDataToTBE;
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ ptl_popTriggerQueue;
+ }
+
+ transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ ptl_popTriggerQueue;
+ }
+
+ transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ ptl_popTriggerQueue;
+ }
+
+ transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BP}, CPUPrbResp) {
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ o_checkForCompletion;
+ pr_popResponseQueue;
+ }
+
+ transition(BS_PM, ProbeAcksComplete, BS_M) {} {
+ sf_setForwardReqTime;
+ pt_popTriggerQueue;
+ }
+
+ transition(BM_PM, ProbeAcksComplete, BM_M) {} {
+ sf_setForwardReqTime;
+ pt_popTriggerQueue;
+ }
+
+ transition(B_PM, ProbeAcksComplete, B_M){} {
+ sf_setForwardReqTime;
+ pt_popTriggerQueue;
+ }
+
+ transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+ sf_setForwardReqTime;
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+ sf_setForwardReqTime;
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+ sf_setForwardReqTime;
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
+ sf_setForwardReqTime;
+ c_sendResponseCtoD;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-msg.sm b/src/mem/protocol/MOESI_AMD_Base-msg.sm
new file mode 100644
index 000000000..ff8842369
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-msg.sm
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+
+enumeration(CoherenceRequestType, desc="Coherence Request Types") {
+ // CPU Request Types ONLY
+ RdBlk, desc="Read Blk";
+ RdBlkM, desc="Read Blk Modified";
+ RdBlkS, desc="Read Blk Shared";
+ CtoD, desc="Change To Dirty";
+ VicClean, desc="L2 clean eviction";
+ VicDirty, desc="L2 dirty eviction";
+ Atomic, desc="Upper level atomic";
+ AtomicWriteBack, desc="Upper level atomic";
+ WriteThrough, desc="Ordered WriteThrough w/Data";
+ WriteThroughFifo, desc="WriteThrough with no data";
+ WriteThroughDummy, desc="WriteThrough with no data for atomic operation";
+ WriteFlush, desc="Release Flush";
+
+ WrCancel, desc="want to cancel WB to Memory"; // should this be here?
+
+ WBApproval, desc="WB Approval";
+
+ // Messages between Dir and R-Dir
+ ForceInv, desc="Send invalide to the block";
+ ForceDowngrade, desc="Send downgrade to the block";
+ Unblock, desc="Used to let the dir know a message has been sunk";
+
+ // Messages between R-Dir and R-Buffer
+ PrivateNotify, desc="Let region buffer know it has private access";
+ SharedNotify, desc="Let region buffer know it has shared access";
+ WbNotify, desc="Let region buffer know it saw its wb request";
+ Downgrade, desc="Force the region buffer to downgrade to shared";
+ // Response to R-Dir (probably should be on a different network, but
+ // I need it to be ordered with respect to requests)
+ InvAck, desc="Let the R-Dir know when the inv has occured";
+
+ PrivateRequest, desc="R-buf wants the region in private";
+ UpgradeRequest, desc="R-buf wants the region in private";
+ SharedRequest, desc="R-buf wants the region in shared (could respond with private)";
+ CleanWbRequest, desc="R-buf wants to deallocate clean region";
+
+ NA, desc="So we don't get segfaults";
+}
+
+enumeration(ProbeRequestType, desc="Probe Request Types") {
+ PrbDowngrade, desc="Probe for Status"; // EtoS, MtoO, StoS
+ PrbInv, desc="Probe to Invalidate";
+
+ // For regions
+ PrbRepl, desc="Force the cache to do a replacement";
+ PrbRegDowngrade, desc="Probe for Status"; // EtoS, MtoO, StoS
+ PrbAtomic, desc="Forwarded Atomic Operation";
+}
+
+
+enumeration(CoherenceResponseType, desc="Coherence Response Types") {
+ NBSysResp, desc="Northbridge response to CPU Rd request";
+ NBSysWBAck, desc="Northbridge response ok to WB";
+ TDSysResp, desc="TCCdirectory response to CPU Rd request";
+ TDSysWBAck, desc="TCCdirectory response ok to WB";
+ TDSysWBNack, desc="TCCdirectory response ok to drop";
+ CPUPrbResp, desc="CPU Probe Response";
+ CPUData, desc="CPU Data";
+ StaleNotif, desc="Notification of Stale WBAck, No data to writeback";
+ CPUCancelWB, desc="want to cancel WB to Memory";
+ MemData, desc="Data from Memory";
+
+ // for regions
+ PrivateAck, desc="Ack that r-buf received private notify";
+ RegionWbAck, desc="Writeback Ack that r-buf completed deallocation";
+ DirReadyAck, desc="Directory (mem ctrl)<->region dir handshake";
+}
+
+enumeration(CoherenceState, default="CoherenceState_NA", desc="Coherence State") {
+ Modified, desc="Modified";
+ Owned, desc="Owned state";
+ Exclusive, desc="Exclusive";
+ Shared, desc="Shared";
+ NA, desc="NA";
+}
+
+structure(CPURequestMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ Addr DemandAddress, desc="Physical block address for this request";
+ CoherenceRequestType Type, desc="Type of request";
+ DataBlock DataBlk, desc="data for the cache line"; // only for WB
+ bool Dirty, desc="whether WB data is dirty"; // only for WB
+ MachineID Requestor, desc="Node who initiated the request";
+ NetDest Destination, desc="Multicast destination mask";
+ bool Shared, desc="For CPU_WrVicBlk, vic is O not M. For CPU_ClVicBlk, vic is S";
+ MessageSizeType MessageSize, desc="size category of the message";
+ Cycles InitialRequestTime, desc="time the initial requests was sent from the L1Cache";
+ Cycles ForwardRequestTime, desc="time the dir forwarded the request";
+ Cycles ProbeRequestStartTime, desc="the time the dir started the probe request";
+ bool DemandRequest, default="false", desc="For profiling purposes";
+
+ NetDest Sharers, desc="Caches that may have a valid copy of the data";
+ bool ForceShared, desc="R-dir knows it is shared, pass on so it sends an S copy, not E";
+ bool Private, default="false", desc="Requestor already has private permissions, no need for dir check";
+ bool CtoDSinked, default="false", desc="This is true if the CtoD previously sent must have been sunk";
+
+ bool NoAckNeeded, default="false", desc="True if region buffer doesn't need to ack";
+ int Acks, default="0", desc="Acks that the dir (mem ctrl) should expect to receive";
+ CoherenceRequestType OriginalType, default="CoherenceRequestType_NA", desc="Type of request from core fwded through region buffer";
+ WriteMask writeMask, desc="Write Through Data";
+ MachineID WTRequestor, desc="Node who initiated the write through";
+ HSAScope scope, default="HSAScope_SYSTEM", desc="Request Scope";
+ int wfid, default="0", desc="wavefront id";
+ bool NoWriteConflict, default="true", desc="write collided with CAB entry";
+ int ProgramCounter, desc="PC that accesses to this block";
+
+ bool functionalRead(Packet *pkt) {
+ // Only PUTX messages contains the data block
+ if (Type == CoherenceRequestType:VicDirty) {
+ return testAndRead(addr, DataBlk, pkt);
+ }
+
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return testAndWrite(addr, DataBlk, pkt);
+ }
+}
+
+structure(NBProbeRequestMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ ProbeRequestType Type, desc="NB_PrbNxtState signal";
+ bool ReturnData, desc="Indicates CPU should return data";
+ NetDest Destination, desc="Node to whom the data is sent";
+ MessageSizeType MessageSize, desc="size category of the message";
+ bool DemandRequest, default="false", desc="demand request, requesting 3-hop transfer";
+ Addr DemandAddress, desc="Demand block address for a region request";
+ MachineID Requestor, desc="Requestor id for 3-hop requests";
+ bool NoAckNeeded, default="false", desc="For short circuting acks";
+ int ProgramCounter, desc="PC that accesses to this block";
+
+ bool functionalRead(Packet *pkt) {
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return false;
+ }
+
+}
+
+structure(TDProbeRequestMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ ProbeRequestType Type, desc="TD_PrbNxtState signal";
+ bool ReturnData, desc="Indicates CPU should return data";
+ bool localCtoD, desc="Indicates CtoD is within the GPU hierarchy (aka TCC subtree)";
+ NetDest Destination, desc="Node to whom the data is sent";
+ MessageSizeType MessageSize, desc="size category of the message";
+ int Phase, desc="Synchronization Phase";
+ int wfid, desc="wavefront id for Release";
+ MachineID Requestor, desc="Node who initiated the request";
+
+ bool functionalRead(Packet *pkt) {
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return false;
+ }
+}
+
+// Response Messages seemed to be easily munged into one type
+structure(ResponseMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ CoherenceResponseType Type, desc="NB Sys Resp or CPU Response to Probe";
+ MachineID Sender, desc="Node who sent the data";
+ NetDest Destination, desc="Node to whom the data is sent";
+ // Begin Used Only By CPU Response
+ DataBlock DataBlk, desc="data for the cache line";
+ bool Hit, desc="probe hit valid line";
+ bool Shared, desc="True if S, or if NB Probe ReturnData==1 && O";
+ bool Dirty, desc="Is the data dirty (different than memory)?";
+ bool Ntsl, desc="indicates probed lin will be invalid after probe";
+ bool UntransferredOwner, desc="pending confirmation of ownership change";
+ // End Used Only By CPU Response
+
+ // Begin NB Response Only
+ CoherenceState State, default=CoherenceState_NA, desc="What returned data from NB should be in";
+ bool CtoD, desc="was the originator a CtoD?";
+ // End NB Response Only
+
+ // Normally if a block gets hit by a probe while waiting to be written back,
+ // you flip the NbReqShared signal (part of the CPURequest signal group).
+ // But since this is in packets and I don't want to send a separate packet,
+ // let's just send this signal back with the data instead
+ bool NbReqShared, desc="modification of Shared field from initial request, e.g. hit by shared probe";
+
+ MessageSizeType MessageSize, desc="size category of the message";
+ Cycles InitialRequestTime, desc="time the initial requests was sent from the L1Cache";
+ Cycles ForwardRequestTime, desc="time the dir forwarded the request";
+ Cycles ProbeRequestStartTime, desc="the time the dir started the probe request";
+ bool DemandRequest, default="false", desc="For profiling purposes";
+
+ bool L3Hit, default="false", desc="Did memory or L3 supply the data?";
+ MachineID OriginalResponder, desc="Mach which wrote the data to the L3";
+ MachineID WTRequestor, desc="Node who started the writethrough";
+
+ bool NotCached, default="false", desc="True when the Region buffer has already evicted the line";
+
+ bool NoAckNeeded, default="false", desc="For short circuting acks";
+ bool isValid, default="false", desc="Is acked block valid";
+ int wfid, default="0", desc="wavefront id";
+ int Phase, desc="Synchronization Phase";
+
+ int ProgramCounter, desc="PC that issues this request";
+ bool mispred, desc="tell TCP if the block should not be bypassed";
+
+
+ bool functionalRead(Packet *pkt) {
+ // Only PUTX messages contains the data block
+ if (Type == CoherenceResponseType:CPUData ||
+ Type == CoherenceResponseType:MemData) {
+ return testAndRead(addr, DataBlk, pkt);
+ }
+
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return testAndWrite(addr, DataBlk, pkt);
+ }
+}
+
+structure(UnblockMsg, desc="...", interface="Message") {
+ Addr addr, desc="Physical address for this request";
+ NetDest Destination, desc="Destination (always directory)";
+ MessageSizeType MessageSize, desc="size category of the message";
+ MachineID Sender, desc="Node who sent the data";
+ bool currentOwner, default="false", desc="Is the sender the current owner";
+ bool DoneAck, default="false", desc="Is this a done ack?";
+ bool Dirty, default="false", desc="Was block dirty when evicted";
+ bool wasValid, default="false", desc="Was block valid when evicted";
+ bool valid, default="false", desc="Is block valid";
+ bool validToInvalid, default="false", desc="Was block valid when evicted";
+
+ bool functionalRead(Packet *pkt) {
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return false;
+ }
+}
+
+enumeration(TriggerType, desc="Trigger Type") {
+ L2_to_L1, desc="L2 to L1 fill";
+ AcksComplete, desc="NB received all needed Acks";
+
+ // For regions
+ InvNext, desc="Invalidate the next block";
+ PrivateAck, desc="Loopback ack for machines with no Region Buffer";
+ AllOutstanding, desc="All outstanding requests have finished";
+ L3Hit, desc="L3 hit in dir";
+
+ // For region directory once the directory is blocked
+ InvRegion, desc="Invalidate region";
+ DowngradeRegion, desc="downgrade region";
+ //For writethrough
+ UnblockWriteThrough, desc="unblock";
+ WriteData, desc="Write to full cacheblock data";
+ WriteDone, desc="Sequencer says that write is done";
+ AtomicDone, desc="Atomic is done";
+}
+
+enumeration(CacheId, desc="Which Cache in the Core") {
+ L1I, desc="L1 I-cache";
+ L1D0, desc="L1 D-cache cluster 0";
+ L1D1, desc="L1 D-cache cluster 1";
+ NA, desc="Default";
+}
+
+structure(TriggerMsg, desc="...", interface="Message") {
+ Addr addr, desc="Address";
+ TriggerType Type, desc="Type of trigger";
+ CacheId Dest, default="CacheId_NA", desc="Cache to invalidate";
+ int ProgramCounter, desc="PC that accesses to this block";
+
+ bool functionalRead(Packet *pkt) {
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return false;
+ }
+
+}
+
+enumeration(FifoType, desc="Fifo Type") {
+ WriteDummy, desc="Dummy Write for atomic operation";
+ WriteThrough, desc="simple writethrough request";
+ WriteFlush, desc="synchronization message";
+}
+
+structure(FifoMsg, desc="...", interface="Message") {
+ Addr addr, desc="Address";
+ FifoType Type, desc="WriteThrough/WriteFlush";
+ int wfid, default="0",desc="wavefront id";
+ MachineID Requestor, desc="Flush Requestor";
+ MachineID oRequestor, desc="original Flush Requestor";
+
+ bool functionalRead(Packet *pkt) {
+ return false;
+ }
+
+ bool functionalWrite(Packet *pkt) {
+ // No check on message type required since the protocol should
+ // read data from those messages that contain the block
+ return false;
+ }
+
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm
new file mode 100644
index 000000000..f545c2fa7
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm
@@ -0,0 +1,1408 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu, Sooraj Puthoor
+ */
+
+/*
+ * This file is based on MOESI_AMD_Base.sm
+ * Differences with AMD base protocol
+ * -- Uses a probe filter memory to track sharers.
+ * -- The probe filter can be inclusive or non-inclusive
+ * -- Only two sharers tracked. Sharers are a) GPU or/and b) CPU
+ * -- If sharer information available, the sharer is probed
+ * -- If sharer information not available, probes are broadcasted
+ */
+
+machine(MachineType:Directory, "AMD Baseline protocol")
+: DirectoryMemory * directory;
+ CacheMemory * L3CacheMemory;
+ CacheMemory * ProbeFilterMemory;
+ Cycles response_latency := 5;
+ Cycles l3_hit_latency := 50;
+ bool noTCCdir := "False";
+ bool CAB_TCC := "False";
+ int TCC_select_num_bits:=1;
+ bool useL3OnWT := "False";
+ bool inclusiveDir := "True";
+ Cycles to_memory_controller_latency := 1;
+
+ // From the Cores
+ MessageBuffer * requestFromCores, network="From", virtual_network="0", ordered="false", vnet_type="request";
+ MessageBuffer * responseFromCores, network="From", virtual_network="2", ordered="false", vnet_type="response";
+ MessageBuffer * unblockFromCores, network="From", virtual_network="4", ordered="false", vnet_type="unblock";
+
+ MessageBuffer * probeToCore, network="To", virtual_network="0", ordered="false", vnet_type="request";
+ MessageBuffer * responseToCore, network="To", virtual_network="2", ordered="false", vnet_type="response";
+
+ MessageBuffer * triggerQueue, ordered="true";
+ MessageBuffer * L3triggerQueue, ordered="true";
+ MessageBuffer * responseFromMemory;
+{
+ // STATES
+ state_declaration(State, desc="Directory states", default="Directory_State_U") {
+ U, AccessPermission:Backing_Store, desc="unblocked";
+ BL, AccessPermission:Busy, desc="got L3 WB request";
+ // BL is Busy because it is busy waiting for the data
+ // which is possibly in the network. The cache which evicted the data
+ // might have moved to some other state after doing the eviction
+ // BS==> Received a read request; has not requested ownership
+ // B==> Received a read request; has requested ownership
+ // BM==> Received a modification request
+ B_P, AccessPermission:Backing_Store, desc="Back invalidation, waiting for probes";
+ BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
+ BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
+ B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
+ BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory";
+ BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
+ BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
+ B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
+ BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
+ B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack";
+ }
+
+ // Events
+ enumeration(Event, desc="Directory events") {
+ // CPU requests
+ RdBlkS, desc="...";
+ RdBlkM, desc="...";
+ RdBlk, desc="...";
+ CtoD, desc="...";
+ WriteThrough, desc="WriteThrough Message";
+ Atomic, desc="Atomic Message";
+
+ // writebacks
+ VicDirty, desc="...";
+ VicClean, desc="...";
+ CPUData, desc="WB data from CPU";
+ StaleWB, desc="Notification that WB has been superceded by a probe";
+
+ // probe responses
+ CPUPrbResp, desc="Probe Response Msg";
+
+ ProbeAcksComplete, desc="Probe Acks Complete";
+
+ L3Hit, desc="Hit in L3 return data to core";
+
+ // Replacement
+ PF_Repl, desc="Replace address from probe filter";
+
+ // Memory Controller
+ MemData, desc="Fetched data from memory arrives";
+ WBAck, desc="Writeback Ack from memory arrives";
+
+ CoreUnblock, desc="Core received data, unblock";
+ UnblockWriteThrough, desc="Unblock because of writethrough request finishing";
+
+ StaleVicDirty, desc="Core invalidated before VicDirty processed";
+ }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+ L3DataArrayRead, desc="Read the data array";
+ L3DataArrayWrite, desc="Write the data array";
+ L3TagArrayRead, desc="Read the data array";
+ L3TagArrayWrite, desc="Write the data array";
+
+ PFTagArrayRead, desc="Read the data array";
+ PFTagArrayWrite, desc="Write the data array";
+ }
+
+ // TYPES
+
+ enumeration(ProbeFilterState, desc="") {
+ T, desc="Tracked";
+ NT, desc="Not tracked";
+ B, desc="Blocked, This entry is being replaced";
+ }
+
+ // DirectoryEntry
+ structure(Entry, desc="...", interface="AbstractEntry") {
+ State DirectoryState, desc="Directory state";
+ DataBlock DataBlk, desc="data for the block";
+ NetDest VicDirtyIgnore, desc="VicDirty coming from whom to ignore";
+ }
+
+ structure(CacheEntry, desc="...", interface="AbstractCacheEntry") {
+ DataBlock DataBlk, desc="data for the block";
+ MachineID LastSender, desc="Mach which this block came from";
+ ProbeFilterState pfState, desc="ProbeFilter state",default="Directory_ProbeFilterState_NT";
+ bool isOnCPU, desc="Block valid in the CPU complex",default="false";
+ bool isOnGPU, desc="Block valid in the GPU complex",default="false";
+ }
+
+ structure(TBE, desc="...") {
+ State TBEState, desc="Transient state";
+ DataBlock DataBlk, desc="data for the block";
+ bool Dirty, desc="Is the data dirty?";
+ int NumPendingAcks, desc="num acks expected";
+ MachineID OriginalRequestor, desc="Original Requestor";
+ MachineID WTRequestor, desc="WT Requestor";
+ bool Cached, desc="data hit in Cache";
+ bool MemData, desc="Got MemData?",default="false";
+ bool wtData, desc="Got write through data?",default="false";
+ bool atomicData, desc="Got Atomic op?",default="false";
+ Cycles InitialRequestTime, desc="...";
+ Cycles ForwardRequestTime, desc="...";
+ Cycles ProbeRequestStartTime, desc="...";
+ MachineID LastSender, desc="Mach which this block came from";
+ bool L3Hit, default="false", desc="Was this an L3 hit?";
+ uint64_t probe_id, desc="probe id for lifetime profiling";
+ WriteMask writeMask, desc="outstanding write through mask";
+ Addr demandAddress, desc="Address of demand request which caused probe filter eviction";
+ }
+
+ structure(TBETable, external="yes") {
+ TBE lookup(Addr);
+ void allocate(Addr);
+ void deallocate(Addr);
+ bool isPresent(Addr);
+ }
+
+ TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
+
+ int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+ Tick clockEdge();
+ Tick cyclesToTicks(Cycles c);
+
+ void set_tbe(TBE a);
+ void unset_tbe();
+ void wakeUpAllBuffers();
+ void wakeUpBuffers(Addr a);
+ Cycles curCycle();
+
+ Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" {
+ Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr));
+
+ if (is_valid(dir_entry)) {
+ //DPRINTF(RubySlicc, "Getting entry %s: %s\n", addr, dir_entry.DataBlk);
+ return dir_entry;
+ }
+
+ dir_entry := static_cast(Entry, "pointer",
+ directory.allocate(addr, new Entry));
+ return dir_entry;
+ }
+
+ DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+ TBE tbe := TBEs.lookup(addr);
+ if (is_valid(tbe) && tbe.MemData) {
+ DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe);
+ return tbe.DataBlk;
+ }
+ DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr));
+ return getDirectoryEntry(addr).DataBlk;
+ }
+
+ State getState(TBE tbe, CacheEntry entry, Addr addr) {
+ CacheEntry probeFilterEntry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(addr));
+ if (inclusiveDir) {
+ if (is_valid(probeFilterEntry) && probeFilterEntry.pfState == ProbeFilterState:B) {
+ return State:B_P;
+ }
+ }
+ return getDirectoryEntry(addr).DirectoryState;
+ }
+
+ void setState(TBE tbe, CacheEntry entry, Addr addr, State state) {
+ getDirectoryEntry(addr).DirectoryState := state;
+ }
+
+ void functionalRead(Addr addr, Packet *pkt) {
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
+ }
+
+ int functionalWrite(Addr addr, Packet *pkt) {
+ int num_functional_writes := 0;
+
+ TBE tbe := TBEs.lookup(addr);
+ if(is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
+
+ num_functional_writes := num_functional_writes +
+ functionalMemoryWrite(pkt);
+ return num_functional_writes;
+ }
+
+ AccessPermission getAccessPermission(Addr addr) {
+ // For this Directory, all permissions are just tracked in Directory, since
+ // it's not possible to have something in TBE but not Dir, just keep track
+ // of state all in one place.
+ if (directory.isPresent(addr)) {
+ return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState);
+ }
+
+ return AccessPermission:NotPresent;
+ }
+
+ void setAccessPermission(CacheEntry entry, Addr addr, State state) {
+ getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state));
+ }
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L3DataArrayRead) {
+ L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+ } else if (request_type == RequestType:L3DataArrayWrite) {
+ L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+ } else if (request_type == RequestType:L3TagArrayRead) {
+ L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:L3TagArrayWrite) {
+ L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ } else if (request_type == RequestType:PFTagArrayRead) {
+ ProbeFilterMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+ } else if (request_type == RequestType:PFTagArrayWrite) {
+ ProbeFilterMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+ }
+ }
+
+ bool checkResourceAvailable(RequestType request_type, Addr addr) {
+ if (request_type == RequestType:L3DataArrayRead) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L3DataArrayWrite) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+ } else if (request_type == RequestType:L3TagArrayRead) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:L3TagArrayWrite) {
+ return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:PFTagArrayRead) {
+ return ProbeFilterMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else if (request_type == RequestType:PFTagArrayWrite) {
+ return ProbeFilterMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+ } else {
+ error("Invalid RequestType type in checkResourceAvailable");
+ return true;
+ }
+ }
+
+ bool isNotPresentProbeFilter(Addr address) {
+ if (ProbeFilterMemory.isTagPresent(address) ||
+ ProbeFilterMemory.cacheAvail(address)) {
+ return false;
+ }
+ return true;
+ }
+
+ bool isGPUSharer(Addr address) {
+ assert(ProbeFilterMemory.isTagPresent(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+ if (entry.pfState == ProbeFilterState:NT) {
+ return true;
+ } else if (entry.isOnGPU){
+ return true;
+ }
+ return false;
+ }
+
+ bool isCPUSharer(Addr address) {
+ assert(ProbeFilterMemory.isTagPresent(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+ if (entry.pfState == ProbeFilterState:NT) {
+ return true;
+ } else if (entry.isOnCPU){
+ return true;
+ }
+ return false;
+ }
+
+
+ // ** OUT_PORTS **
+ out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore);
+ out_port(responseNetwork_out, ResponseMsg, responseToCore);
+
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+ out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue);
+
+ // ** IN_PORTS **
+
+ // Trigger Queue
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=5) {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == TriggerType:AcksComplete) {
+ trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe);
+ }else if (in_msg.Type == TriggerType:UnblockWriteThrough) {
+ trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe);
+ } else {
+ error("Unknown trigger msg");
+ }
+ }
+ }
+ }
+
+ in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=4) {
+ if (L3TriggerQueue_in.isReady(clockEdge())) {
+ peek(L3TriggerQueue_in, TriggerMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == TriggerType:L3Hit) {
+ trigger(Event:L3Hit, in_msg.addr, entry, tbe);
+ } else {
+ error("Unknown trigger msg");
+ }
+ }
+ }
+ }
+
+ // Unblock Network
+ in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=3) {
+ if (unblockNetwork_in.isReady(clockEdge())) {
+ peek(unblockNetwork_in, UnblockMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ trigger(Event:CoreUnblock, in_msg.addr, entry, tbe);
+ }
+ }
+ }
+
+ // Core response network
+ in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=2) {
+ if (responseNetwork_in.isReady(clockEdge())) {
+ peek(responseNetwork_in, ResponseMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+ trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceResponseType:CPUData) {
+ trigger(Event:CPUData, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+ trigger(Event:StaleWB, in_msg.addr, entry, tbe);
+ } else {
+ error("Unexpected response type");
+ }
+ }
+ }
+ }
+
+ // off-chip memory request/response is done
+ in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=1) {
+ if (memQueue_in.isReady(clockEdge())) {
+ peek(memQueue_in, MemoryMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
+ trigger(Event:MemData, in_msg.addr, entry, tbe);
+ DPRINTF(RubySlicc, "%s\n", in_msg);
+ } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
+ trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them.
+ } else {
+ DPRINTF(RubySlicc, "%s\n", in_msg.Type);
+ error("Invalid message");
+ }
+ }
+ }
+ }
+
+ in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) {
+ if (requestNetwork_in.isReady(clockEdge())) {
+ peek(requestNetwork_in, CPURequestMsg) {
+ TBE tbe := TBEs.lookup(in_msg.addr);
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+ if (inclusiveDir && isNotPresentProbeFilter(in_msg.addr)) {
+ Addr victim := ProbeFilterMemory.cacheProbe(in_msg.addr);
+ tbe := TBEs.lookup(victim);
+ entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(victim));
+ trigger(Event:PF_Repl, victim, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
+ trigger(Event:RdBlk, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+ trigger(Event:RdBlkS, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+ trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+ trigger(Event:Atomic, in_msg.addr, entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+ if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+ DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr);
+ trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr);
+ trigger(Event:VicDirty, in_msg.addr, entry, tbe);
+ }
+ } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+ if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+ DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr);
+ trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+ } else {
+ DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr);
+ trigger(Event:VicClean, in_msg.addr, entry, tbe);
+ }
+ } else {
+ error("Bad request message type");
+ }
+ }
+ }
+ }
+
+ // Actions
+ action(s_sendResponseS, "s", desc="send Shared response") {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Shared;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(es_sendResponseES, "es", desc="send Exclusive or Shared response") {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := tbe.Dirty;
+ if (tbe.Cached) {
+ out_msg.State := CoherenceState:Shared;
+ } else {
+ out_msg.State := CoherenceState:Exclusive;
+ }
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ // write-through and atomics do not send an unblock ack back to the
+ // directory. Hence, directory has to generate a self unblocking
+ // message. Additionally, write through's does not require data
+ // in its response. Hence, write through is treated seperately from
+ // write-back and atomics
+ action(m_sendResponseM, "m", desc="send Modified response") {
+ if (tbe.wtData) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:UnblockWriteThrough;
+ }
+ }else{
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ if (tbe.L3Hit) {
+ out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+ } else {
+ out_msg.Sender := machineID;
+ }
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.CtoD := false;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ out_msg.OriginalResponder := tbe.LastSender;
+ if(tbe.atomicData){
+ out_msg.WTRequestor := tbe.WTRequestor;
+ }
+ out_msg.L3Hit := tbe.L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ if (tbe.atomicData) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:UnblockWriteThrough;
+ }
+ }
+ }
+ }
+
+ action(c_sendResponseCtoD, "c", desc="send CtoD Ack") {
+ enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysResp;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(tbe.OriginalRequestor);
+ out_msg.MessageSize := MessageSizeType:Response_Control;
+ out_msg.Dirty := false;
+ out_msg.State := CoherenceState:Modified;
+ out_msg.CtoD := true;
+ out_msg.InitialRequestTime := tbe.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ }
+
+ action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:NBSysWBAck;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.WTRequestor := in_msg.WTRequestor;
+ out_msg.Sender := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := curCycle();
+ out_msg.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(l_queueMemWBReq, "lq", desc="Write WB data to memory") {
+ peek(responseNetwork_in, ResponseMsg) {
+ queueMemoryWrite(machineID, address, to_memory_controller_latency,
+ in_msg.DataBlk);
+ }
+ }
+
+ action(l_queueMemRdReq, "lr", desc="Read data from memory") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:L3Hit;
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ }
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ tbe.DataBlk := entry.DataBlk;
+ tbe.LastSender := entry.LastSender;
+ tbe.L3Hit := true;
+ tbe.MemData := true;
+ L3CacheMemory.deallocate(address);
+ } else {
+ queueMemoryRead(machineID, address, to_memory_controller_latency);
+ }
+ }
+ }
+
+ action(dc_probeInvCoreData, "dc", desc="probe inv cores, return data") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ if(isCPUSharer(address)) {
+ out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket
+ }
+
+ // add relevant TCC node to list. This replaces all TCPs and SQCs
+ if(isGPUSharer(address)) {
+ if ((in_msg.Type == CoherenceRequestType:WriteThrough ||
+ in_msg.Type == CoherenceRequestType:Atomic) &&
+ in_msg.NoWriteConflict) {
+ // Don't Include TCCs unless there was write-CAB conflict in the TCC
+ } else if(noTCCdir) {
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ } else {
+ out_msg.Destination.add(map_Address_to_TCCdir(address));
+ }
+ }
+ out_msg.Destination.remove(in_msg.Requestor);
+ tbe.NumPendingAcks := out_msg.Destination.count();
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ APPEND_TRANSITION_COMMENT(" dc: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(bp_backProbe, "bp", desc="back probe") {
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ if(isCPUSharer(address)) {
+ // won't be realistic for multisocket
+ out_msg.Destination.broadcast(MachineType:CorePair);
+ }
+ // add relevant TCC node to the list. This replaces all TCPs and SQCs
+ if(isGPUSharer(address)) {
+ if (noTCCdir) {
+ //Don't need to notify TCC about reads
+ } else {
+ out_msg.Destination.add(map_Address_to_TCCdir(address));
+ tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+ }
+ if (noTCCdir && CAB_TCC) {
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ }
+ }
+ tbe.NumPendingAcks := out_msg.Destination.count();
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ DPRINTF(RubySlicc, "%s\n", (out_msg));
+ APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ APPEND_TRANSITION_COMMENT(" - back probe");
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+
+ action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
+ peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbDowngrade;
+ out_msg.ReturnData := true;
+ out_msg.MessageSize := MessageSizeType:Control;
+ if(isCPUSharer(address)) {
+ out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket
+ }
+ // add relevant TCC node to the list. This replaces all TCPs and SQCs
+ if(isGPUSharer(address)) {
+ if (noTCCdir) {
+ //Don't need to notify TCC about reads
+ } else {
+ out_msg.Destination.add(map_Address_to_TCCdir(address));
+ tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+ }
+ if (noTCCdir && CAB_TCC) {
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ }
+ }
+ out_msg.Destination.remove(in_msg.Requestor);
+ tbe.NumPendingAcks := out_msg.Destination.count();
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ DPRINTF(RubySlicc, "%s\n", (out_msg));
+ APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") {
+ peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+ enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+ out_msg.addr := address;
+ out_msg.Type := ProbeRequestType:PrbInv;
+ out_msg.ReturnData := false;
+ out_msg.MessageSize := MessageSizeType:Control;
+ if(isCPUSharer(address)) {
+ out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket
+ }
+
+ // add relevant TCC node to the list. This replaces all TCPs and SQCs
+ if(isGPUSharer(address)) {
+ if (noTCCdir) {
+ out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+ TCC_select_low_bit, TCC_select_num_bits));
+ } else {
+ out_msg.Destination.add(map_Address_to_TCCdir(address));
+ }
+ }
+ out_msg.Destination.remove(in_msg.Requestor);
+ tbe.NumPendingAcks := out_msg.Destination.count();
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ DPRINTF(RubySlicc, "%s\n", out_msg);
+ tbe.ProbeRequestStartTime := curCycle();
+ }
+ }
+ }
+
+ action(sm_setMRU, "sm", desc="set probe filter entry as MRU") {
+ ProbeFilterMemory.setMRU(address);
+ }
+
+ action(d_writeDataToMemory, "d", desc="Write data to memory") {
+ peek(responseNetwork_in, ResponseMsg) {
+ getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+ DPRINTF(RubySlicc, "Writing Data: %s to address %s\n", in_msg.DataBlk,
+ in_msg.addr);
+ }
+ }
+
+ action(te_allocateTBEForEviction, "te", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ tbe.writeMask.clear();
+ tbe.wtData := false;
+ tbe.atomicData := false;
+ tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+ tbe.Dirty := false;
+ tbe.NumPendingAcks := 0;
+ }
+
+ action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+ check_allocate(TBEs);
+ peek(requestNetwork_in, CPURequestMsg) {
+ TBEs.allocate(address);
+ set_tbe(TBEs.lookup(address));
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ tbe.writeMask.clear();
+ tbe.writeMask.orMask(in_msg.writeMask);
+ tbe.wtData := true;
+ tbe.WTRequestor := in_msg.WTRequestor;
+ tbe.LastSender := in_msg.Requestor;
+ }
+ if (in_msg.Type == CoherenceRequestType:Atomic) {
+ tbe.writeMask.clear();
+ tbe.writeMask.orMask(in_msg.writeMask);
+ tbe.atomicData := true;
+ tbe.WTRequestor := in_msg.WTRequestor;
+ tbe.LastSender := in_msg.Requestor;
+ }
+ tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+ tbe.Dirty := false;
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask);
+ tbe.Dirty := false;
+ }
+ tbe.OriginalRequestor := in_msg.Requestor;
+ tbe.NumPendingAcks := 0;
+ tbe.Cached := in_msg.ForceShared;
+ tbe.InitialRequestTime := in_msg.InitialRequestTime;
+ }
+ }
+
+ action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+ if (tbe.Dirty == false) {
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ }
+ TBEs.deallocate(address);
+ unset_tbe();
+ }
+
+ action(wd_writeBackData, "wd", desc="Write back data if needed") {
+ if (tbe.wtData) {
+ DataBlock tmp := getDirectoryEntry(address).DataBlk;
+ tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+ tbe.DataBlk := tmp;
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ } else if (tbe.atomicData) {
+ tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,
+ tbe.writeMask);
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ } else if (tbe.Dirty == false) {
+ getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+ }
+ }
+
+ action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") {
+ peek(memQueue_in, MemoryMsg) {
+ if (tbe.wtData == true) {
+ // DO Nothing (already have the directory data)
+ } else if (tbe.Dirty == false) {
+ tbe.DataBlk := getDirectoryEntry(address).DataBlk;
+ }
+ tbe.MemData := true;
+ }
+ }
+
+ action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (in_msg.Dirty) {
+ DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender);
+ DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk);
+ if (tbe.wtData) {
+ DataBlock tmp := in_msg.DataBlk;
+ tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+ tbe.DataBlk := tmp;
+ } else if (tbe.Dirty) {
+ if(tbe.atomicData == false && tbe.wtData == false) {
+ DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
+ assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data
+ }
+ } else {
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.Dirty := in_msg.Dirty;
+ tbe.LastSender := in_msg.Sender;
+ }
+ }
+ if (in_msg.Hit) {
+ tbe.Cached := true;
+ }
+ }
+ }
+
+ action(mwc_markSinkWriteCancel, "mwc", desc="Mark to sink impending VicDirty") {
+ peek(responseNetwork_in, ResponseMsg) {
+ DPRINTF(RubySlicc, "Write cancel bit set on address %s\n", address);
+ getDirectoryEntry(address).VicDirtyIgnore.add(in_msg.Sender);
+ APPEND_TRANSITION_COMMENT(" setting bit to sink VicDirty ");
+ }
+ }
+
+ action(x_decrementAcks, "x", desc="decrement Acks pending") {
+ tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+ APPEND_TRANSITION_COMMENT(" Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ }
+
+ action(o_checkForCompletion, "o", desc="check for ack completion") {
+ if (tbe.NumPendingAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg, 1) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:AcksComplete;
+ }
+ }
+ APPEND_TRANSITION_COMMENT(" Check: Acks remaining: ");
+ APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+ }
+
+ action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor);
+ }
+ }
+
+ action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") {
+ peek(responseNetwork_in, ResponseMsg) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+ entry.DataBlk := in_msg.DataBlk;
+ entry.LastSender := in_msg.Sender;
+ } else {
+ if (L3CacheMemory.cacheAvail(address) == false) {
+ Addr victim := L3CacheMemory.cacheProbe(address);
+ CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+ L3CacheMemory.lookup(victim));
+ queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+ victim_entry.DataBlk);
+ L3CacheMemory.deallocate(victim);
+ }
+ assert(L3CacheMemory.cacheAvail(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+ entry.DataBlk := in_msg.DataBlk;
+
+ entry.LastSender := in_msg.Sender;
+ }
+ }
+ }
+
+ action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") {
+ if ((tbe.wtData || tbe.atomicData) && useL3OnWT) {
+ if (L3CacheMemory.isTagPresent(address)) {
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+ entry.DataBlk := tbe.DataBlk;
+ entry.LastSender := tbe.LastSender;
+ } else {
+ if (L3CacheMemory.cacheAvail(address) == false) {
+ Addr victim := L3CacheMemory.cacheProbe(address);
+ CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+ L3CacheMemory.lookup(victim));
+ queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+ victim_entry.DataBlk);
+ L3CacheMemory.deallocate(victim);
+ }
+ assert(L3CacheMemory.cacheAvail(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+ APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+ entry.DataBlk := tbe.DataBlk;
+ entry.LastSender := tbe.LastSender;
+ }
+ }
+ }
+
+ action(apf_allocateProbeFilterEntry, "apf", desc="Allocate probe filte entry") {
+ if (!ProbeFilterMemory.isTagPresent(address)) {
+ if (inclusiveDir) {
+ assert(ProbeFilterMemory.cacheAvail(address));
+ } else if (ProbeFilterMemory.cacheAvail(address) == false) {
+ Addr victim := ProbeFilterMemory.cacheProbe(address);
+ ProbeFilterMemory.deallocate(victim);
+ }
+ assert(ProbeFilterMemory.cacheAvail(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.allocate(address, new CacheEntry));
+ APPEND_TRANSITION_COMMENT(" allocating a new probe filter entry");
+ entry.pfState := ProbeFilterState:NT;
+ if (inclusiveDir) {
+ entry.pfState := ProbeFilterState:T;
+ }
+ entry.isOnCPU := false;
+ entry.isOnGPU := false;
+ }
+ }
+
+ action(mpfe_markPFEntryForEviction, "mpfe", desc="Mark this PF entry is being evicted") {
+ assert(ProbeFilterMemory.isTagPresent(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+ entry.pfState := ProbeFilterState:B;
+ peek(requestNetwork_in, CPURequestMsg) {
+ tbe.demandAddress := in_msg.addr;
+ }
+ }
+
+ action(we_wakeUpEvictionDependents, "we", desc="Wake up requests waiting for demand address and victim address") {
+ wakeUpBuffers(address);
+ wakeUpBuffers(tbe.demandAddress);
+ }
+
+ action(dpf_deallocateProbeFilter, "dpf", desc="deallocate PF entry") {
+ assert(ProbeFilterMemory.isTagPresent(address));
+ ProbeFilterMemory.deallocate(address);
+ }
+
+ action(upf_updateProbeFilter, "upf", desc="") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ assert(ProbeFilterMemory.isTagPresent(address));
+ CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+ if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+ entry.pfState := ProbeFilterState:T;
+ entry.isOnCPU := false;
+ entry.isOnGPU := false;
+ } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+ entry.pfState := ProbeFilterState:T;
+ entry.isOnCPU := false;
+ entry.isOnGPU := false;
+ } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+ entry.pfState := ProbeFilterState:T;
+ entry.isOnCPU := false;
+ entry.isOnGPU := false;
+ } else if (in_msg.Type == CoherenceRequestType:CtoD) {
+ entry.pfState := ProbeFilterState:T;
+ entry.isOnCPU := false;
+ entry.isOnGPU := false;
+ }
+ if(machineIDToMachineType(in_msg.Requestor) == MachineType:CorePair) {
+ entry.isOnCPU := true;
+ } else {
+ entry.isOnGPU := true;
+ }
+ }
+ }
+
+ action(rmcd_removeSharerConditional, "rmcd", desc="remove sharer from probe Filter, conditional") {
+ peek(requestNetwork_in, CPURequestMsg) {
+ if (ProbeFilterMemory.isTagPresent(address)) {
+ CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+ if(machineIDToMachineType(in_msg.Requestor) == MachineType:CorePair) {//CorePair has inclusive L2
+ if (in_msg.Type == CoherenceRequestType:VicDirty) {
+ entry.isOnCPU := false;
+ } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+ entry.isOnCPU := false;
+ }
+ }
+ }
+ }
+ }
+
+ action(sf_setForwardReqTime, "sf", desc="...") {
+ tbe.ForwardRequestTime := curCycle();
+ }
+
+ action(dl_deallocateL3, "dl", desc="deallocate the L3 block") {
+ L3CacheMemory.deallocate(address);
+ }
+
+ action(p_popRequestQueue, "p", desc="pop request queue") {
+ requestNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pr_popResponseQueue, "pr", desc="pop response queue") {
+ responseNetwork_in.dequeue(clockEdge());
+ }
+
+ action(pm_popMemQueue, "pm", desc="pop mem queue") {
+ memQueue_in.dequeue(clockEdge());
+ }
+
+ action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") {
+ L3TriggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(pu_popUnblockQueue, "pu", desc="pop unblock queue") {
+ unblockNetwork_in.dequeue(clockEdge());
+ }
+
+ action(zz_recycleRequestQueue, "zz", desc="recycle request queue") {
+ requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(yy_recycleResponseQueue, "yy", desc="recycle response queue") {
+ responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+ }
+
+ action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") {
+ stall_and_wait(requestNetwork_in, address);
+ }
+
+ action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") {
+ wakeUpBuffers(address);
+ }
+
+ action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") {
+ wakeUpAllBuffers();
+ }
+
+ action(z_stall, "z", desc="...") {
+ }
+
+ // TRANSITIONS
+ transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
+ st_stallAndWaitRequest;
+ }
+
+ // It may be possible to save multiple invalidations here!
+ transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, {Atomic, WriteThrough}) {
+ st_stallAndWaitRequest;
+ }
+
+
+ // transitions from U
+ transition(U, PF_Repl, B_P) {PFTagArrayRead, PFTagArrayWrite}{
+ te_allocateTBEForEviction;
+ apf_allocateProbeFilterEntry;
+ bp_backProbe;
+ sm_setMRU;
+ mpfe_markPFEntryForEviction;
+ }
+
+ transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} {
+ t_allocateTBE;
+ apf_allocateProbeFilterEntry;
+ l_queueMemRdReq;
+ sc_probeShrCoreData;
+ sm_setMRU;
+ upf_updateProbeFilter;
+ p_popRequestQueue;
+ }
+
+ transition(U, WriteThrough, BM_PM) {L3TagArrayRead, L3TagArrayWrite, PFTagArrayRead, PFTagArrayWrite} {
+ t_allocateTBE;
+ apf_allocateProbeFilterEntry;
+ w_sendResponseWBAck;
+ l_queueMemRdReq;
+ dc_probeInvCoreData;
+ sm_setMRU;
+ upf_updateProbeFilter;
+ p_popRequestQueue;
+ }
+
+ transition(U, Atomic, BM_PM) {L3TagArrayRead, L3TagArrayWrite, PFTagArrayRead, PFTagArrayWrite} {
+ t_allocateTBE;
+ apf_allocateProbeFilterEntry;
+ l_queueMemRdReq;
+ dc_probeInvCoreData;
+ sm_setMRU;
+ upf_updateProbeFilter;
+ p_popRequestQueue;
+ }
+
+ transition(U, {RdBlkM}, BM_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} {
+ t_allocateTBE;
+ apf_allocateProbeFilterEntry;
+ l_queueMemRdReq;
+ dc_probeInvCoreData;
+ sm_setMRU;
+ upf_updateProbeFilter;
+ p_popRequestQueue;
+ }
+
+ transition(U, RdBlk, B_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite}{
+ t_allocateTBE;
+ apf_allocateProbeFilterEntry;
+ l_queueMemRdReq;
+ sc_probeShrCoreData;
+ sm_setMRU;
+ upf_updateProbeFilter;
+ p_popRequestQueue;
+ }
+
+ transition(U, CtoD, BP) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} {
+ t_allocateTBE;
+ apf_allocateProbeFilterEntry;
+ ic_probeInvCore;
+ sm_setMRU;
+ upf_updateProbeFilter;
+ p_popRequestQueue;
+ }
+
+ transition(U, VicDirty, BL) {L3TagArrayRead} {
+ t_allocateTBE;
+ w_sendResponseWBAck;
+ rmcd_removeSharerConditional;
+ p_popRequestQueue;
+ }
+
+ transition(U, VicClean, BL) {L3TagArrayRead} {
+ t_allocateTBE;
+ w_sendResponseWBAck;
+ rmcd_removeSharerConditional;
+ p_popRequestQueue;
+ }
+
+ transition(BL, {VicDirty, VicClean}) {
+ zz_recycleRequestQueue;
+ }
+
+ transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} {
+ d_writeDataToMemory;
+ al_allocateL3Block;
+ wa_wakeUpDependents;
+ dt_deallocateTBE;
+ //l_queueMemWBReq; // why need an ack? esp. with DRAMSim, just put it in queue no ack needed
+ pr_popResponseQueue;
+ }
+
+ transition(BL, StaleWB, U) {L3TagArrayWrite} {
+ dt_deallocateTBE;
+ wa_wakeUpAllDependents;
+ pr_popResponseQueue;
+ }
+
+ transition({B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P}, {VicDirty, VicClean}) {
+ z_stall;
+ }
+
+ transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, WBAck) {
+ pm_popMemQueue;
+ }
+
+ transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, PF_Repl) {
+ zz_recycleRequestQueue;
+ }
+
+ transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, StaleVicDirty) {
+ rv_removeVicDirtyIgnore;
+ w_sendResponseWBAck;
+ p_popRequestQueue;
+ }
+
+ transition({B}, CoreUnblock, U) {
+ wa_wakeUpDependents;
+ pu_popUnblockQueue;
+ }
+
+ transition(B, UnblockWriteThrough, U) {
+ wa_wakeUpDependents;
+ pt_popTriggerQueue;
+ }
+
+ transition(BS_PM, MemData, BS_Pm) {} {
+ mt_writeMemDataToTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BM_PM, MemData, BM_Pm){} {
+ mt_writeMemDataToTBE;
+ pm_popMemQueue;
+ }
+
+ transition(B_PM, MemData, B_Pm){} {
+ mt_writeMemDataToTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BS_PM, L3Hit, BS_Pm) {} {
+ ptl_popTriggerQueue;
+ }
+
+ transition(BM_PM, L3Hit, BM_Pm) {} {
+ ptl_popTriggerQueue;
+ }
+
+ transition(B_PM, L3Hit, B_Pm) {} {
+ ptl_popTriggerQueue;
+ }
+
+ transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ mt_writeMemDataToTBE;
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ mt_writeMemDataToTBE;
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+ mt_writeMemDataToTBE;
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pm_popMemQueue;
+ }
+
+ transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ ptl_popTriggerQueue;
+ }
+
+ transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ ptl_popTriggerQueue;
+ }
+
+ transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ ptl_popTriggerQueue;
+ }
+
+ transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, BP}, CPUPrbResp) {
+ y_writeProbeDataToTBE;
+ x_decrementAcks;
+ o_checkForCompletion;
+ pr_popResponseQueue;
+ }
+
+ transition(BS_PM, ProbeAcksComplete, BS_M) {} {
+ sf_setForwardReqTime;
+ pt_popTriggerQueue;
+ }
+
+ transition(BM_PM, ProbeAcksComplete, BM_M) {} {
+ sf_setForwardReqTime;
+ pt_popTriggerQueue;
+ }
+
+ transition(B_PM, ProbeAcksComplete, B_M){} {
+ sf_setForwardReqTime;
+ pt_popTriggerQueue;
+ }
+
+ transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+ sf_setForwardReqTime;
+ s_sendResponseS;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+ sf_setForwardReqTime;
+ m_sendResponseM;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+ sf_setForwardReqTime;
+ es_sendResponseES;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(B_P, ProbeAcksComplete, U) {
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ we_wakeUpEvictionDependents;
+ dpf_deallocateProbeFilter;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+
+ transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
+ sf_setForwardReqTime;
+ c_sendResponseCtoD;
+ wd_writeBackData;
+ alwt_allocateL3BlockOnWT;
+ dt_deallocateTBE;
+ pt_popTriggerQueue;
+ }
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base.slicc b/src/mem/protocol/MOESI_AMD_Base.slicc
new file mode 100644
index 000000000..b38145246
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base.slicc
@@ -0,0 +1,6 @@
+protocol "MOESI_AMD_Base";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-CorePair.sm";
+include "MOESI_AMD_Base-L3cache.sm";
+include "MOESI_AMD_Base-dir.sm";
diff --git a/src/mem/protocol/RubySlicc_ComponentMapping.sm b/src/mem/protocol/RubySlicc_ComponentMapping.sm
index a72492b42..e1d7c4399 100644
--- a/src/mem/protocol/RubySlicc_ComponentMapping.sm
+++ b/src/mem/protocol/RubySlicc_ComponentMapping.sm
@@ -37,7 +37,10 @@ MachineID mapAddressToRange(Addr addr, MachineType type,
NetDest broadcast(MachineType type);
MachineID map_Address_to_DMA(Addr addr);
MachineID map_Address_to_Directory(Addr addr);
+MachineID map_Address_to_RegionDir(Addr addr);
NodeID map_Address_to_DirectoryNode(Addr addr);
+MachineID map_Address_to_TCCdir(Addr addr);
+NodeID map_Address_to_TCCdirNode(Addr addr);
NodeID machineIDToNodeID(MachineID machID);
NodeID machineIDToVersion(MachineID machID);
MachineType machineIDToMachineType(MachineID machID);
diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm
index 5ee26d65c..c743ebe28 100644
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -62,7 +62,7 @@ bool testAndWrite(Addr addr, DataBlock datablk, Packet *pkt);
// AccessPermission
// The following five states define the access permission of all memory blocks.
-// These permissions have multiple uses. They coordinate locking and
+// These permissions have multiple uses. They coordinate locking and
// synchronization primitives, as well as enable functional accesses.
// One should not need to add any additional permission values and it is very
// risky to do so.
@@ -73,7 +73,7 @@ enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent")
Read_Write, desc="block is Read/Write";
// Possibly Invalid data
- // The maybe stale permission indicates that accordingly to the protocol,
+ // The maybe stale permission indicates that accordingly to the protocol,
// there is no guarantee the block contains valid data. However, functional
// writes should update the block because a dataless PUT request may
// revalidate the block's data.
@@ -227,6 +227,13 @@ enumeration(MachineType, desc="...", default="MachineType_NULL") {
Collector, desc="Collector Mach";
L1Cache_wCC, desc="L1 Cache Mach to track cache-to-cache transfer (used for miss latency profile)";
L2Cache_wCC, desc="L2 Cache Mach to track cache-to-cache transfer (used for miss latency profile)";
+ CorePair, desc="Cache Mach (2 cores, Private L1Ds, Shared L1I & L2)";
+ TCP, desc="GPU L1 Data Cache (Texture Cache per Pipe)";
+ TCC, desc="GPU L2 Shared Cache (Texture Cache per Channel)";
+ TCCdir, desc="Directory at the GPU L2 Cache (TCC)";
+ SQC, desc="GPU L1 Instr Cache (Sequencer Cache)";
+ RegionDir, desc="Region-granular directory";
+ RegionBuffer,desc="Region buffer for CPU and GPU";
NULL, desc="null mach type";
}
diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm
index a6c57e1b0..b8d284725 100644
--- a/src/mem/protocol/RubySlicc_Types.sm
+++ b/src/mem/protocol/RubySlicc_Types.sm
@@ -31,8 +31,8 @@
//
// **PLEASE NOTE!** When adding objects to this file you must also add a line
-// in the src/mem/ruby/SConscript file. Otherwise the external object's .hh
-// file will not be copied to the protocol directory and you will encounter a
+// in the src/mem/ruby/SConscript file. Otherwise the external object's .hh
+// file will not be copied to the protocol directory and you will encounter a
// undefined declaration error.
//
@@ -95,6 +95,8 @@ structure (NetDest, external = "yes", non_obj="yes") {
bool intersectionIsEmpty(Set);
bool intersectionIsEmpty(NetDest);
MachineID smallestElement(MachineType);
+ NetDest OR(NetDest);
+ NetDest AND(NetDest);
}
structure (Sequencer, external = "yes") {
@@ -117,6 +119,44 @@ structure (Sequencer, external = "yes") {
void invalidateSC(Addr);
}
+structure (GPUCoalescer, external = "yes") {
+ void readCallback(Addr, DataBlock);
+ void readCallback(Addr, MachineType, DataBlock);
+ void readCallback(Addr, MachineType, DataBlock,
+ Cycles, Cycles, Cycles);
+ void readCallback(Addr, MachineType, DataBlock,
+ Cycles, Cycles, Cycles, bool);
+ void writeCallback(Addr, DataBlock);
+ void writeCallback(Addr, MachineType, DataBlock);
+ void writeCallback(Addr, MachineType, DataBlock,
+ Cycles, Cycles, Cycles);
+ void writeCallback(Addr, MachineType, DataBlock,
+ Cycles, Cycles, Cycles, bool);
+ void checkCoherence(Addr);
+ void evictionCallback(Addr);
+ void recordCPReadCallBack(MachineID, MachineID);
+ void recordCPWriteCallBack(MachineID, MachineID);
+}
+
+structure (VIPERCoalescer, external = "yes") {
+ void readCallback(Addr, DataBlock);
+ void readCallback(Addr, MachineType, DataBlock);
+ void readCallback(Addr, MachineType, DataBlock,
+ Cycles, Cycles, Cycles);
+ void readCallback(Addr, MachineType, DataBlock,
+ Cycles, Cycles, Cycles, bool);
+ void writeCallback(Addr, DataBlock);
+ void writeCallback(Addr, MachineType, DataBlock);
+ void writeCallback(Addr, MachineType, DataBlock,
+ Cycles, Cycles, Cycles);
+ void writeCallback(Addr, MachineType, DataBlock,
+ Cycles, Cycles, Cycles, bool);
+ void invCallback(Addr);
+ void wbCallback(Addr);
+ void checkCoherence(Addr);
+ void evictionCallback(Addr);
+}
+
structure(RubyRequest, desc="...", interface="Message", external="yes") {
Addr LineAddress, desc="Line address for this request";
Addr PhysicalAddress, desc="Physical address for this request";
@@ -161,6 +201,7 @@ structure (CacheMemory, external = "yes") {
Cycles getTagLatency();
Cycles getDataLatency();
void setMRU(Addr);
+ void setMRU(Addr, int);
void setMRU(AbstractCacheEntry);
void recordRequestType(CacheRequestType, Addr);
bool checkResourceAvailable(CacheResourceType, Addr);
diff --git a/src/mem/protocol/SConsopts b/src/mem/protocol/SConsopts
index ca432a73e..47b36e276 100644
--- a/src/mem/protocol/SConsopts
+++ b/src/mem/protocol/SConsopts
@@ -33,6 +33,11 @@ import os
Import('*')
all_protocols.extend([
+ 'GPU_VIPER',
+ 'GPU_VIPER_Baseline',
+ 'GPU_VIPER_Region',
+ 'GPU_RfO',
+ 'MOESI_AMD_Base',
'MESI_Two_Level',
'MESI_Three_Level',
'MI_example',
diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript
index 16e932432..82a16c9b0 100644
--- a/src/mem/ruby/SConscript
+++ b/src/mem/ruby/SConscript
@@ -124,13 +124,20 @@ MakeInclude('common/Set.hh')
MakeInclude('common/WriteMask.hh')
MakeInclude('filters/AbstractBloomFilter.hh')
MakeInclude('network/MessageBuffer.hh')
-MakeInclude('structures/Prefetcher.hh')
MakeInclude('structures/CacheMemory.hh')
-MakeInclude('system/DMASequencer.hh')
MakeInclude('structures/DirectoryMemory.hh')
-MakeInclude('structures/WireBuffer.hh')
MakeInclude('structures/PerfectCacheMemory.hh')
MakeInclude('structures/PersistentTable.hh')
-MakeInclude('system/Sequencer.hh')
+MakeInclude('structures/Prefetcher.hh')
MakeInclude('structures/TBETable.hh')
MakeInclude('structures/TimerTable.hh')
+MakeInclude('structures/WireBuffer.hh')
+MakeInclude('system/DMASequencer.hh')
+MakeInclude('system/Sequencer.hh')
+
+# External types : Group "mem/protocol" : include "header.hh" to the bottom
+# of this MakeIncludes if it is referenced as
+# <# include "mem/protocol/header.hh"> in any file
+# generated_dir = Dir('../protocol')
+MakeInclude('system/GPUCoalescer.hh')
+MakeInclude('system/VIPERCoalescer.hh')
diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc
index b3b37e5a6..7d3f20982 100644
--- a/src/mem/ruby/profiler/Profiler.cc
+++ b/src/mem/ruby/profiler/Profiler.cc
@@ -269,7 +269,7 @@ Profiler::collateStats()
it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
AbstractController *ctr = (*it).second;
- Sequencer *seq = ctr->getSequencer();
+ Sequencer *seq = ctr->getCPUSequencer();
if (seq != NULL) {
m_outstandReqHist.add(seq->getOutstandReqHist());
}
@@ -282,7 +282,7 @@ Profiler::collateStats()
it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
AbstractController *ctr = (*it).second;
- Sequencer *seq = ctr->getSequencer();
+ Sequencer *seq = ctr->getCPUSequencer();
if (seq != NULL) {
// add all the latencies
m_latencyHist.add(seq->getLatencyHist());
diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
index 926556781..cbd068c04 100644
--- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
+++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
@@ -56,6 +56,12 @@ class AbstractCacheEntry : public AbstractEntry
virtual DataBlock& getDataBlk()
{ panic("getDataBlk() not implemented!"); }
+ int validBlocks;
+ virtual int& getNumValidBlocks()
+ {
+ return validBlocks;
+ }
+
// Functions for locking and unlocking the cache entry. These are required
// for supporting atomic memory accesses.
void setLocked(int context);
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index 93fe50c88..458fde5bc 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -200,6 +200,12 @@ AbstractController::unblock(Addr addr)
}
}
+bool
+AbstractController::isBlocked(Addr addr)
+{
+ return (m_block_map.count(addr) > 0);
+}
+
BaseMasterPort &
AbstractController::getMasterPort(const std::string &if_name,
PortID idx)
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 383507eed..4488ee3f4 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -73,6 +73,7 @@ class AbstractController : public MemObject, public Consumer
// return instance name
void blockOnQueue(Addr, MessageBuffer*);
void unblock(Addr);
+ bool isBlocked(Addr);
virtual MessageBuffer* getMandatoryQueue() const = 0;
virtual MessageBuffer* getMemoryQueue() const = 0;
@@ -84,7 +85,7 @@ class AbstractController : public MemObject, public Consumer
virtual void regStats();
virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0;
- virtual Sequencer* getSequencer() const = 0;
+ virtual Sequencer* getCPUSequencer() const = 0;
//! These functions are used by ruby system to read/write the data blocks
//! that exist with in the controller.
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
index 46071335e..cdedc2e14 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
@@ -43,6 +43,12 @@ map_Address_to_DirectoryNode(Addr addr)
return DirectoryMemory::mapAddressToDirectoryVersion(addr);
}
+inline NodeID
+map_Address_to_TCCdirNode(Addr addr)
+{
+ return DirectoryMemory::mapAddressToDirectoryVersion(addr);
+}
+
// used to determine the home directory
// returns a value between 0 and total_directories_within_the_system
inline MachineID
@@ -53,6 +59,22 @@ map_Address_to_Directory(Addr addr)
return mach;
}
+inline MachineID
+map_Address_to_RegionDir(Addr addr)
+{
+ MachineID mach = {MachineType_RegionDir,
+ map_Address_to_DirectoryNode(addr)};
+ return mach;
+}
+
+inline MachineID
+map_Address_to_TCCdir(Addr addr)
+{
+ MachineID mach =
+ {MachineType_TCCdir, map_Address_to_TCCdirNode(addr)};
+ return mach;
+}
+
inline NetDest
broadcast(MachineType type)
{
@@ -102,4 +124,11 @@ createMachineID(MachineType type, NodeID id)
return mach;
}
+inline MachineID
+MachineTypeAndNodeIDToMachineID(MachineType type, NodeID node)
+{
+ MachineID mach = {type, node};
+ return mach;
+}
+
#endif // __MEM_RUBY_SLICC_INTERFACE_COMPONENTMAPPINGS_HH__
diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc
index a8a3ba949..45fb85d05 100644
--- a/src/mem/ruby/structures/CacheMemory.cc
+++ b/src/mem/ruby/structures/CacheMemory.cc
@@ -35,6 +35,7 @@
#include "mem/protocol/AccessPermission.hh"
#include "mem/ruby/structures/CacheMemory.hh"
#include "mem/ruby/system/RubySystem.hh"
+#include "mem/ruby/system/WeightedLRUPolicy.hh"
using namespace std;
@@ -66,29 +67,27 @@ CacheMemory::CacheMemory(const Params *p)
m_start_index_bit = p->start_index_bit;
m_is_instruction_only_cache = p->is_icache;
m_resource_stalls = p->resourceStalls;
+ m_block_size = p->block_size; // may be 0 at this point. Updated in init()
}
void
CacheMemory::init()
{
- m_cache_num_sets = (m_cache_size / m_cache_assoc) /
- RubySystem::getBlockSizeBytes();
+ if (m_block_size == 0) {
+ m_block_size = RubySystem::getBlockSizeBytes();
+ }
+ m_cache_num_sets = (m_cache_size / m_cache_assoc) / m_block_size;
assert(m_cache_num_sets > 1);
m_cache_num_set_bits = floorLog2(m_cache_num_sets);
assert(m_cache_num_set_bits > 0);
- m_cache.resize(m_cache_num_sets);
- for (int i = 0; i < m_cache_num_sets; i++) {
- m_cache[i].resize(m_cache_assoc);
- for (int j = 0; j < m_cache_assoc; j++) {
- m_cache[i][j] = NULL;
- }
- }
+ m_cache.resize(m_cache_num_sets,
+ std::vector<AbstractCacheEntry*>(m_cache_assoc, nullptr));
}
CacheMemory::~CacheMemory()
{
- if (m_replacementPolicy_ptr != NULL)
+ if (m_replacementPolicy_ptr)
delete m_replacementPolicy_ptr;
for (int i = 0; i < m_cache_num_sets; i++) {
for (int j = 0; j < m_cache_assoc; j++) {
@@ -359,6 +358,37 @@ CacheMemory::setMRU(const AbstractCacheEntry *e)
}
void
+CacheMemory::setMRU(Addr address, int occupancy)
+{
+ int64_t cacheSet = addressToCacheSet(address);
+ int loc = findTagInSet(cacheSet, address);
+
+ if(loc != -1) {
+ if (m_replacementPolicy_ptr->useOccupancy()) {
+ (static_cast<WeightedLRUPolicy*>(m_replacementPolicy_ptr))->
+ touch(cacheSet, loc, curTick(), occupancy);
+ } else {
+ m_replacementPolicy_ptr->
+ touch(cacheSet, loc, curTick());
+ }
+ }
+}
+
+int
+CacheMemory::getReplacementWeight(int64_t set, int64_t loc)
+{
+ assert(set < m_cache_num_sets);
+ assert(loc < m_cache_assoc);
+ int ret = 0;
+ if(m_cache[set][loc] != NULL) {
+ ret = m_cache[set][loc]->getNumValidBlocks();
+ assert(ret >= 0);
+ }
+
+ return ret;
+}
+
+void
CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const
{
uint64_t warmedUpBlocks = 0;
diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh
index 72805b32b..5b30505d3 100644
--- a/src/mem/ruby/structures/CacheMemory.hh
+++ b/src/mem/ruby/structures/CacheMemory.hh
@@ -106,7 +106,8 @@ class CacheMemory : public SimObject
// Set this address to most recently used
void setMRU(Addr address);
- // Set this entry to most recently used
+ void setMRU(Addr addr, int occupancy);
+ int getReplacementWeight(int64_t set, int64_t loc);
void setMRU(const AbstractCacheEntry *e);
// Functions for locking and unlocking cache lines corresponding to the
@@ -146,6 +147,7 @@ class CacheMemory : public SimObject
Stats::Scalar numDataArrayStalls;
int getCacheSize() const { return m_cache_size; }
+ int getCacheAssoc() const { return m_cache_assoc; }
int getNumBlocks() const { return m_cache_num_sets * m_cache_assoc; }
Addr getAddressAtIdx(int idx) const;
@@ -182,6 +184,7 @@ class CacheMemory : public SimObject
int m_cache_assoc;
int m_start_index_bit;
bool m_resource_stalls;
+ int m_block_size;
};
std::ostream& operator<<(std::ostream& out, const CacheMemory& obj);
diff --git a/src/mem/ruby/structures/RubyCache.py b/src/mem/ruby/structures/RubyCache.py
index 4eb87ac74..9fc4726b0 100644
--- a/src/mem/ruby/structures/RubyCache.py
+++ b/src/mem/ruby/structures/RubyCache.py
@@ -42,6 +42,7 @@ class RubyCache(SimObject):
"")
start_index_bit = Param.Int(6, "index start, default 6 for 64-byte line");
is_icache = Param.Bool(False, "is instruction only cache");
+ block_size = Param.MemorySize("0B", "block size in bytes. 0 means default RubyBlockSize")
dataArrayBanks = Param.Int(1, "Number of banks for the data array")
tagArrayBanks = Param.Int(1, "Number of banks for the tag array")
diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc
new file mode 100644
index 000000000..db279bd3a
--- /dev/null
+++ b/src/mem/ruby/system/GPUCoalescer.cc
@@ -0,0 +1,1397 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#include "base/misc.hh"
+#include "base/str.hh"
+#include "config/the_isa.hh"
+
+#if THE_ISA == X86_ISA
+#include "arch/x86/insts/microldstop.hh"
+
+#endif // X86_ISA
+#include "mem/ruby/system/GPUCoalescer.hh"
+
+#include "cpu/testers/rubytest/RubyTester.hh"
+#include "debug/GPUCoalescer.hh"
+#include "debug/MemoryAccess.hh"
+#include "debug/ProtocolTrace.hh"
+#include "debug/RubyPort.hh"
+#include "debug/RubyStats.hh"
+#include "gpu-compute/shader.hh"
+#include "mem/packet.hh"
+#include "mem/ruby/common/DataBlock.hh"
+#include "mem/ruby/common/SubBlock.hh"
+#include "mem/ruby/network/MessageBuffer.hh"
+#include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/slicc_interface/AbstractController.hh"
+#include "mem/ruby/slicc_interface/RubyRequest.hh"
+#include "mem/ruby/structures/CacheMemory.hh"
+#include "mem/ruby/system/RubySystem.hh"
+#include "params/RubyGPUCoalescer.hh"
+
+using namespace std;
+
+GPUCoalescer *
+RubyGPUCoalescerParams::create()
+{
+ return new GPUCoalescer(this);
+}
+
+HSAScope
+reqScopeToHSAScope(Request* req)
+{
+ HSAScope accessScope = HSAScope_UNSPECIFIED;
+ if (req->isScoped()) {
+ if (req->isWavefrontScope()) {
+ accessScope = HSAScope_WAVEFRONT;
+ } else if (req->isWorkgroupScope()) {
+ accessScope = HSAScope_WORKGROUP;
+ } else if (req->isDeviceScope()) {
+ accessScope = HSAScope_DEVICE;
+ } else if (req->isSystemScope()) {
+ accessScope = HSAScope_SYSTEM;
+ } else {
+ fatal("Bad scope type");
+ }
+ }
+ return accessScope;
+}
+
+HSASegment
+reqSegmentToHSASegment(Request* req)
+{
+ HSASegment accessSegment = HSASegment_GLOBAL;
+
+ if (req->isGlobalSegment()) {
+ accessSegment = HSASegment_GLOBAL;
+ } else if (req->isGroupSegment()) {
+ accessSegment = HSASegment_GROUP;
+ } else if (req->isPrivateSegment()) {
+ accessSegment = HSASegment_PRIVATE;
+ } else if (req->isKernargSegment()) {
+ accessSegment = HSASegment_KERNARG;
+ } else if (req->isReadonlySegment()) {
+ accessSegment = HSASegment_READONLY;
+ } else if (req->isSpillSegment()) {
+ accessSegment = HSASegment_SPILL;
+ } else if (req->isArgSegment()) {
+ accessSegment = HSASegment_ARG;
+ } else {
+ fatal("Bad segment type");
+ }
+
+ return accessSegment;
+}
+
+GPUCoalescer::GPUCoalescer(const Params *p)
+ : RubyPort(p), issueEvent(this), deadlockCheckEvent(this)
+{
+ m_store_waiting_on_load_cycles = 0;
+ m_store_waiting_on_store_cycles = 0;
+ m_load_waiting_on_store_cycles = 0;
+ m_load_waiting_on_load_cycles = 0;
+
+ m_outstanding_count = 0;
+
+ m_max_outstanding_requests = 0;
+ m_deadlock_threshold = 0;
+ m_instCache_ptr = nullptr;
+ m_dataCache_ptr = nullptr;
+
+ m_instCache_ptr = p->icache;
+ m_dataCache_ptr = p->dcache;
+ m_max_outstanding_requests = p->max_outstanding_requests;
+ m_deadlock_threshold = p->deadlock_threshold;
+
+ assert(m_max_outstanding_requests > 0);
+ assert(m_deadlock_threshold > 0);
+ assert(m_instCache_ptr);
+ assert(m_dataCache_ptr);
+
+ m_data_cache_hit_latency = p->dcache_hit_latency;
+
+ m_usingNetworkTester = p->using_network_tester;
+ assumingRfOCoherence = p->assume_rfo;
+}
+
+GPUCoalescer::~GPUCoalescer()
+{
+}
+
+void
+GPUCoalescer::wakeup()
+{
+ // Check for deadlock of any of the requests
+ Cycles current_time = curCycle();
+
+ // Check across all outstanding requests
+ int total_outstanding = 0;
+
+ RequestTable::iterator read = m_readRequestTable.begin();
+ RequestTable::iterator read_end = m_readRequestTable.end();
+ for (; read != read_end; ++read) {
+ GPUCoalescerRequest* request = read->second;
+ if (current_time - request->issue_time < m_deadlock_threshold)
+ continue;
+
+ panic("Possible Deadlock detected. Aborting!\n"
+ "version: %d request.paddr: 0x%x m_readRequestTable: %d "
+ "current time: %u issue_time: %d difference: %d\n", m_version,
+ request->pkt->getAddr(), m_readRequestTable.size(),
+ current_time * clockPeriod(), request->issue_time * clockPeriod(),
+ (current_time - request->issue_time)*clockPeriod());
+ }
+
+ RequestTable::iterator write = m_writeRequestTable.begin();
+ RequestTable::iterator write_end = m_writeRequestTable.end();
+ for (; write != write_end; ++write) {
+ GPUCoalescerRequest* request = write->second;
+ if (current_time - request->issue_time < m_deadlock_threshold)
+ continue;
+
+ panic("Possible Deadlock detected. Aborting!\n"
+ "version: %d request.paddr: 0x%x m_writeRequestTable: %d "
+ "current time: %u issue_time: %d difference: %d\n", m_version,
+ request->pkt->getAddr(), m_writeRequestTable.size(),
+ current_time * clockPeriod(), request->issue_time * clockPeriod(),
+ (current_time - request->issue_time) * clockPeriod());
+ }
+
+ total_outstanding += m_writeRequestTable.size();
+ total_outstanding += m_readRequestTable.size();
+
+ assert(m_outstanding_count == total_outstanding);
+
+ if (m_outstanding_count > 0) {
+ // If there are still outstanding requests, keep checking
+ schedule(deadlockCheckEvent,
+ m_deadlock_threshold * clockPeriod() +
+ curTick());
+ }
+}
+
+void
+GPUCoalescer::resetStats()
+{
+ m_latencyHist.reset();
+ m_missLatencyHist.reset();
+ for (int i = 0; i < RubyRequestType_NUM; i++) {
+ m_typeLatencyHist[i]->reset();
+ m_missTypeLatencyHist[i]->reset();
+ for (int j = 0; j < MachineType_NUM; j++) {
+ m_missTypeMachLatencyHist[i][j]->reset();
+ }
+ }
+
+ for (int i = 0; i < MachineType_NUM; i++) {
+ m_missMachLatencyHist[i]->reset();
+
+ m_IssueToInitialDelayHist[i]->reset();
+ m_InitialToForwardDelayHist[i]->reset();
+ m_ForwardToFirstResponseDelayHist[i]->reset();
+ m_FirstResponseToCompletionDelayHist[i]->reset();
+ }
+}
+
+void
+GPUCoalescer::printProgress(ostream& out) const
+{
+}
+
+RequestStatus
+GPUCoalescer::getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
+{
+ Addr line_addr = makeLineAddress(pkt->getAddr());
+
+ if (!m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge())) {
+ return RequestStatus_BufferFull;
+ }
+
+ if(m_controller->isBlocked(line_addr) &&
+ request_type != RubyRequestType_Locked_RMW_Write) {
+ return RequestStatus_Aliased;
+ }
+
+ if ((request_type == RubyRequestType_ST) ||
+ (request_type == RubyRequestType_ATOMIC) ||
+ (request_type == RubyRequestType_ATOMIC_RETURN) ||
+ (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
+ (request_type == RubyRequestType_RMW_Read) ||
+ (request_type == RubyRequestType_RMW_Write) ||
+ (request_type == RubyRequestType_Load_Linked) ||
+ (request_type == RubyRequestType_Store_Conditional) ||
+ (request_type == RubyRequestType_Locked_RMW_Read) ||
+ (request_type == RubyRequestType_Locked_RMW_Write) ||
+ (request_type == RubyRequestType_FLUSH)) {
+
+ // Check if there is any outstanding read request for the same
+ // cache line.
+ if (m_readRequestTable.count(line_addr) > 0) {
+ m_store_waiting_on_load_cycles++;
+ return RequestStatus_Aliased;
+ }
+
+ if (m_writeRequestTable.count(line_addr) > 0) {
+ // There is an outstanding write request for the cache line
+ m_store_waiting_on_store_cycles++;
+ return RequestStatus_Aliased;
+ }
+ } else {
+ // Check if there is any outstanding write request for the same
+ // cache line.
+ if (m_writeRequestTable.count(line_addr) > 0) {
+ m_load_waiting_on_store_cycles++;
+ return RequestStatus_Aliased;
+ }
+
+ if (m_readRequestTable.count(line_addr) > 0) {
+ // There is an outstanding read request for the cache line
+ m_load_waiting_on_load_cycles++;
+ return RequestStatus_Aliased;
+ }
+ }
+
+ return RequestStatus_Ready;
+
+}
+
+
+
+// sets the kernelEndList
+void
+GPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt)
+{
+ // Don't know if this will happen or is possible
+ // but I just want to be careful and not have it become
+ // simulator hang in the future
+ DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id);
+ assert(kernelEndList.count(wavefront_id) == 0);
+
+ kernelEndList[wavefront_id] = pkt;
+ DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n",
+ kernelEndList.size());
+}
+
+
+// Insert the request on the correct request table. Return true if
+// the entry was already present.
+bool
+GPUCoalescer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
+{
+ assert(getRequestStatus(pkt, request_type) == RequestStatus_Ready ||
+ pkt->req->isLockedRMW() ||
+ !m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge()));
+
+ int total_outstanding M5_VAR_USED =
+ m_writeRequestTable.size() + m_readRequestTable.size();
+
+ assert(m_outstanding_count == total_outstanding);
+
+ // See if we should schedule a deadlock check
+ if (deadlockCheckEvent.scheduled() == false) {
+ schedule(deadlockCheckEvent, m_deadlock_threshold + curTick());
+ }
+
+ Addr line_addr = makeLineAddress(pkt->getAddr());
+ if ((request_type == RubyRequestType_ST) ||
+ (request_type == RubyRequestType_ATOMIC) ||
+ (request_type == RubyRequestType_ATOMIC_RETURN) ||
+ (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
+ (request_type == RubyRequestType_RMW_Read) ||
+ (request_type == RubyRequestType_RMW_Write) ||
+ (request_type == RubyRequestType_Load_Linked) ||
+ (request_type == RubyRequestType_Store_Conditional) ||
+ (request_type == RubyRequestType_Locked_RMW_Read) ||
+ (request_type == RubyRequestType_Locked_RMW_Write) ||
+ (request_type == RubyRequestType_FLUSH)) {
+
+ pair<RequestTable::iterator, bool> r =
+ m_writeRequestTable.insert(RequestTable::value_type(line_addr,
+ (GPUCoalescerRequest*) NULL));
+ if (r.second) {
+ RequestTable::iterator i = r.first;
+ i->second = new GPUCoalescerRequest(pkt, request_type,
+ curCycle());
+ DPRINTF(GPUCoalescer,
+ "Inserting write request for paddr %#x for type %d\n",
+ pkt->req->getPaddr(), i->second->m_type);
+ m_outstanding_count++;
+ } else {
+ return true;
+ }
+ } else {
+ pair<RequestTable::iterator, bool> r =
+ m_readRequestTable.insert(RequestTable::value_type(line_addr,
+ (GPUCoalescerRequest*) NULL));
+
+ if (r.second) {
+ RequestTable::iterator i = r.first;
+ i->second = new GPUCoalescerRequest(pkt, request_type,
+ curCycle());
+ DPRINTF(GPUCoalescer,
+ "Inserting read request for paddr %#x for type %d\n",
+ pkt->req->getPaddr(), i->second->m_type);
+ m_outstanding_count++;
+ } else {
+ return true;
+ }
+ }
+
+ m_outstandReqHist.sample(m_outstanding_count);
+
+ total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size();
+ assert(m_outstanding_count == total_outstanding);
+
+ return false;
+}
+
+void
+GPUCoalescer::markRemoved()
+{
+ m_outstanding_count--;
+ assert(m_outstanding_count ==
+ m_writeRequestTable.size() + m_readRequestTable.size());
+}
+
+void
+GPUCoalescer::removeRequest(GPUCoalescerRequest* srequest)
+{
+ assert(m_outstanding_count ==
+ m_writeRequestTable.size() + m_readRequestTable.size());
+
+ Addr line_addr = makeLineAddress(srequest->pkt->getAddr());
+ if ((srequest->m_type == RubyRequestType_ST) ||
+ (srequest->m_type == RubyRequestType_RMW_Read) ||
+ (srequest->m_type == RubyRequestType_RMW_Write) ||
+ (srequest->m_type == RubyRequestType_Load_Linked) ||
+ (srequest->m_type == RubyRequestType_Store_Conditional) ||
+ (srequest->m_type == RubyRequestType_Locked_RMW_Read) ||
+ (srequest->m_type == RubyRequestType_Locked_RMW_Write)) {
+ m_writeRequestTable.erase(line_addr);
+ } else {
+ m_readRequestTable.erase(line_addr);
+ }
+
+ markRemoved();
+}
+
+bool
+GPUCoalescer::handleLlsc(Addr address, GPUCoalescerRequest* request)
+{
+ //
+ // The success flag indicates whether the LLSC operation was successful.
+ // LL ops will always succeed, but SC may fail if the cache line is no
+ // longer locked.
+ //
+ bool success = true;
+ if (request->m_type == RubyRequestType_Store_Conditional) {
+ if (!m_dataCache_ptr->isLocked(address, m_version)) {
+ //
+ // For failed SC requests, indicate the failure to the cpu by
+ // setting the extra data to zero.
+ //
+ request->pkt->req->setExtraData(0);
+ success = false;
+ } else {
+ //
+ // For successful SC requests, indicate the success to the cpu by
+ // setting the extra data to one.
+ //
+ request->pkt->req->setExtraData(1);
+ }
+ //
+ // Independent of success, all SC operations must clear the lock
+ //
+ m_dataCache_ptr->clearLocked(address);
+ } else if (request->m_type == RubyRequestType_Load_Linked) {
+ //
+ // Note: To fully follow Alpha LLSC semantics, should the LL clear any
+ // previously locked cache lines?
+ //
+ m_dataCache_ptr->setLocked(address, m_version);
+ } else if ((m_dataCache_ptr->isTagPresent(address)) &&
+ (m_dataCache_ptr->isLocked(address, m_version))) {
+ //
+ // Normal writes should clear the locked address
+ //
+ m_dataCache_ptr->clearLocked(address);
+ }
+ return success;
+}
+
+void
+GPUCoalescer::writeCallback(Addr address, DataBlock& data)
+{
+ writeCallback(address, MachineType_NULL, data);
+}
+
+void
+GPUCoalescer::writeCallback(Addr address,
+ MachineType mach,
+ DataBlock& data)
+{
+ writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
+}
+
+void
+GPUCoalescer::writeCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime)
+{
+ writeCallback(address, mach, data,
+ initialRequestTime, forwardRequestTime, firstResponseTime,
+ false);
+}
+
+void
+GPUCoalescer::writeCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool isRegion)
+{
+ assert(address == makeLineAddress(address));
+
+ DPRINTF(GPUCoalescer, "write callback for address %#x\n", address);
+ assert(m_writeRequestTable.count(makeLineAddress(address)));
+
+ RequestTable::iterator i = m_writeRequestTable.find(address);
+ assert(i != m_writeRequestTable.end());
+ GPUCoalescerRequest* request = i->second;
+
+ m_writeRequestTable.erase(i);
+ markRemoved();
+
+ assert((request->m_type == RubyRequestType_ST) ||
+ (request->m_type == RubyRequestType_ATOMIC) ||
+ (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
+ (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
+ (request->m_type == RubyRequestType_RMW_Read) ||
+ (request->m_type == RubyRequestType_RMW_Write) ||
+ (request->m_type == RubyRequestType_Load_Linked) ||
+ (request->m_type == RubyRequestType_Store_Conditional) ||
+ (request->m_type == RubyRequestType_Locked_RMW_Read) ||
+ (request->m_type == RubyRequestType_Locked_RMW_Write) ||
+ (request->m_type == RubyRequestType_FLUSH));
+
+
+ //
+ // For Alpha, properly handle LL, SC, and write requests with respect to
+ // locked cache blocks.
+ //
+ // Not valid for Network_test protocl
+ //
+ bool success = true;
+ if(!m_usingNetworkTester)
+ success = handleLlsc(address, request);
+
+ if (request->m_type == RubyRequestType_Locked_RMW_Read) {
+ m_controller->blockOnQueue(address, m_mandatory_q_ptr);
+ } else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
+ m_controller->unblock(address);
+ }
+
+ hitCallback(request, mach, data, success,
+ request->issue_time, forwardRequestTime, firstResponseTime,
+ isRegion);
+}
+
+void
+GPUCoalescer::readCallback(Addr address, DataBlock& data)
+{
+ readCallback(address, MachineType_NULL, data);
+}
+
+void
+GPUCoalescer::readCallback(Addr address,
+ MachineType mach,
+ DataBlock& data)
+{
+ readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
+}
+
+void
+GPUCoalescer::readCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime)
+{
+
+ readCallback(address, mach, data,
+ initialRequestTime, forwardRequestTime, firstResponseTime,
+ false);
+}
+
+void
+GPUCoalescer::readCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool isRegion)
+{
+ assert(address == makeLineAddress(address));
+ assert(m_readRequestTable.count(makeLineAddress(address)));
+
+ DPRINTF(GPUCoalescer, "read callback for address %#x\n", address);
+ RequestTable::iterator i = m_readRequestTable.find(address);
+ assert(i != m_readRequestTable.end());
+ GPUCoalescerRequest* request = i->second;
+
+ m_readRequestTable.erase(i);
+ markRemoved();
+
+ assert((request->m_type == RubyRequestType_LD) ||
+ (request->m_type == RubyRequestType_IFETCH));
+
+ hitCallback(request, mach, data, true,
+ request->issue_time, forwardRequestTime, firstResponseTime,
+ isRegion);
+}
+
+void
+GPUCoalescer::hitCallback(GPUCoalescerRequest* srequest,
+ MachineType mach,
+ DataBlock& data,
+ bool success,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool isRegion)
+{
+ PacketPtr pkt = srequest->pkt;
+ Addr request_address = pkt->getAddr();
+ Addr request_line_address = makeLineAddress(request_address);
+
+ RubyRequestType type = srequest->m_type;
+
+ // Set this cache entry to the most recently used
+ if (type == RubyRequestType_IFETCH) {
+ if (m_instCache_ptr->isTagPresent(request_line_address))
+ m_instCache_ptr->setMRU(request_line_address);
+ } else {
+ if (m_dataCache_ptr->isTagPresent(request_line_address))
+ m_dataCache_ptr->setMRU(request_line_address);
+ }
+
+ recordMissLatency(srequest, mach,
+ initialRequestTime,
+ forwardRequestTime,
+ firstResponseTime,
+ success, isRegion);
+ // update the data
+ //
+ // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
+ int len = reqCoalescer[request_line_address].size();
+ std::vector<PacketPtr> mylist;
+ for (int i = 0; i < len; ++i) {
+ PacketPtr pkt = reqCoalescer[request_line_address][i].first;
+ assert(type ==
+ reqCoalescer[request_line_address][i].second[PrimaryType]);
+ request_address = pkt->getAddr();
+ request_line_address = makeLineAddress(pkt->getAddr());
+ if (pkt->getPtr<uint8_t>()) {
+ if ((type == RubyRequestType_LD) ||
+ (type == RubyRequestType_ATOMIC) ||
+ (type == RubyRequestType_ATOMIC_RETURN) ||
+ (type == RubyRequestType_IFETCH) ||
+ (type == RubyRequestType_RMW_Read) ||
+ (type == RubyRequestType_Locked_RMW_Read) ||
+ (type == RubyRequestType_Load_Linked)) {
+ memcpy(pkt->getPtr<uint8_t>(),
+ data.getData(getOffset(request_address),
+ pkt->getSize()),
+ pkt->getSize());
+ } else {
+ data.setData(pkt->getPtr<uint8_t>(),
+ getOffset(request_address), pkt->getSize());
+ }
+ } else {
+ DPRINTF(MemoryAccess,
+ "WARNING. Data not transfered from Ruby to M5 for type " \
+ "%s\n",
+ RubyRequestType_to_string(type));
+ }
+
+ // If using the RubyTester, update the RubyTester sender state's
+ // subBlock with the recieved data. The tester will later access
+ // this state.
+ // Note: RubyPort will access it's sender state before the
+ // RubyTester.
+ if (m_usingRubyTester) {
+ RubyPort::SenderState *requestSenderState =
+ safe_cast<RubyPort::SenderState*>(pkt->senderState);
+ RubyTester::SenderState* testerSenderState =
+ safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
+ testerSenderState->subBlock.mergeFrom(data);
+ }
+
+ mylist.push_back(pkt);
+ }
+ delete srequest;
+ reqCoalescer.erase(request_line_address);
+ assert(!reqCoalescer.count(request_line_address));
+
+
+
+ completeHitCallback(mylist, len);
+}
+
+bool
+GPUCoalescer::empty() const
+{
+ return m_writeRequestTable.empty() && m_readRequestTable.empty();
+}
+
+// Analyzes the packet to see if this request can be coalesced.
+// If request can be coalesced, this request is added to the reqCoalescer table
+// and makeRequest returns RequestStatus_Issued;
+// If this is the first request to a cacheline, request is added to both
+// newRequests queue and to the reqCoalescer table; makeRequest
+// returns RequestStatus_Issued.
+// If there is a pending request to this cacheline and this request
+// can't be coalesced, RequestStatus_Aliased is returned and
+// the packet needs to be reissued.
+RequestStatus
+GPUCoalescer::makeRequest(PacketPtr pkt)
+{
+ // Check for GPU Barrier Kernel End or Kernel Begin
+ // Leave these to be handled by the child class
+ // Kernel End/Barrier = isFlush + isRelease
+ // Kernel Begin = isFlush + isAcquire
+ if (pkt->req->isKernel()) {
+ if (pkt->req->isAcquire()){
+ // This is a Kernel Begin leave handling to
+ // virtual xCoalescer::makeRequest
+ return RequestStatus_Issued;
+ }else if(pkt->req->isRelease()) {
+ // This is a Kernel End leave handling to
+ // virtual xCoalescer::makeRequest
+ // If we are here then we didn't call
+ // a virtual version of this function
+ // so we will also schedule the callback
+ int wf_id = 0;
+ if (pkt->req->hasContextId()) {
+ wf_id = pkt->req->contextId();
+ }
+ insertKernel(wf_id, pkt);
+ newKernelEnds.push_back(wf_id);
+ if (!issueEvent.scheduled()) {
+ schedule(issueEvent, curTick());
+ }
+ return RequestStatus_Issued;
+ }
+ }
+
+ // If number of outstanding requests greater than the max allowed,
+ // return RequestStatus_BufferFull. This logic can be extended to
+ // support proper backpressure.
+ if (m_outstanding_count >= m_max_outstanding_requests) {
+ return RequestStatus_BufferFull;
+ }
+
+ RubyRequestType primary_type = RubyRequestType_NULL;
+ RubyRequestType secondary_type = RubyRequestType_NULL;
+
+ if (pkt->isLLSC()) {
+ //
+ // Alpha LL/SC instructions need to be handled carefully by the cache
+ // coherence protocol to ensure they follow the proper semantics. In
+ // particular, by identifying the operations as atomic, the protocol
+ // should understand that migratory sharing optimizations should not
+ // be performed (i.e. a load between the LL and SC should not steal
+ // away exclusive permission).
+ //
+ if (pkt->isWrite()) {
+ primary_type = RubyRequestType_Store_Conditional;
+ } else {
+ assert(pkt->isRead());
+ primary_type = RubyRequestType_Load_Linked;
+ }
+ secondary_type = RubyRequestType_ATOMIC;
+ } else if (pkt->req->isLockedRMW()) {
+ //
+ // x86 locked instructions are translated to store cache coherence
+ // requests because these requests should always be treated as read
+ // exclusive operations and should leverage any migratory sharing
+ // optimization built into the protocol.
+ //
+ if (pkt->isWrite()) {
+ primary_type = RubyRequestType_Locked_RMW_Write;
+ } else {
+ assert(pkt->isRead());
+ primary_type = RubyRequestType_Locked_RMW_Read;
+ }
+ secondary_type = RubyRequestType_ST;
+ } else if (pkt->isAtomicOp()) {
+ //
+ // GPU Atomic Operation
+ //
+ primary_type = RubyRequestType_ATOMIC;
+ secondary_type = RubyRequestType_ATOMIC;
+ } else {
+ if (pkt->isRead()) {
+ if (pkt->req->isInstFetch()) {
+ primary_type = secondary_type = RubyRequestType_IFETCH;
+ } else {
+#if THE_ISA == X86_ISA
+ uint32_t flags = pkt->req->getFlags();
+ bool storeCheck = flags &
+ (TheISA::StoreCheck << TheISA::FlagShift);
+#else
+ bool storeCheck = false;
+#endif // X86_ISA
+ if (storeCheck) {
+ primary_type = RubyRequestType_RMW_Read;
+ secondary_type = RubyRequestType_ST;
+ } else {
+ primary_type = secondary_type = RubyRequestType_LD;
+ }
+ }
+ } else if (pkt->isWrite()) {
+ //
+ // Note: M5 packets do not differentiate ST from RMW_Write
+ //
+ primary_type = secondary_type = RubyRequestType_ST;
+ } else if (pkt->isFlush()) {
+ primary_type = secondary_type = RubyRequestType_FLUSH;
+ } else if (pkt->req->isRelease() || pkt->req->isAcquire()) {
+ if (assumingRfOCoherence) {
+ // If we reached here, this request must be a memFence
+ // and the protocol implements RfO, the coalescer can
+ // assume sequentially consistency and schedule the callback
+ // immediately.
+ // Currently the code implements fence callbacks
+ // by reusing the mechanism for kernel completions.
+ // This should be fixed.
+ int wf_id = 0;
+ if (pkt->req->hasContextId()) {
+ wf_id = pkt->req->contextId();
+ }
+ insertKernel(wf_id, pkt);
+ newKernelEnds.push_back(wf_id);
+ if (!issueEvent.scheduled()) {
+ schedule(issueEvent, curTick());
+ }
+ return RequestStatus_Issued;
+ } else {
+ // If not RfO, return issued here and let the child coalescer
+ // take care of it.
+ return RequestStatus_Issued;
+ }
+ } else {
+ panic("Unsupported ruby packet type\n");
+ }
+ }
+
+ // Check if there is any pending request to this cache line from
+ // previous cycles.
+ // If there is a pending request, return aliased. Since coalescing
+ // across time is not permitted, aliased requests are not coalesced.
+ // If a request for this address has already been issued, we must block
+ RequestStatus status = getRequestStatus(pkt, primary_type);
+ if (status != RequestStatus_Ready)
+ return status;
+
+ Addr line_addr = makeLineAddress(pkt->getAddr());
+
+ // Check if this request can be coalesced with previous
+ // requests from this cycle.
+ if (!reqCoalescer.count(line_addr)) {
+ // This is the first access to this cache line.
+ // A new request to the memory subsystem has to be
+ // made in the next cycle for this cache line, so
+ // add this line addr to the "newRequests" queue
+ newRequests.push_back(line_addr);
+
+ // There was a request to this cache line in this cycle,
+ // let us see if we can coalesce this request with the previous
+ // requests from this cycle
+ } else if (primary_type !=
+ reqCoalescer[line_addr][0].second[PrimaryType]) {
+ // can't coalesce loads, stores and atomics!
+ return RequestStatus_Aliased;
+ } else if (pkt->req->isLockedRMW() ||
+ reqCoalescer[line_addr][0].first->req->isLockedRMW()) {
+ // can't coalesce locked accesses, but can coalesce atomics!
+ return RequestStatus_Aliased;
+ } else if (pkt->req->hasContextId() && pkt->req->isRelease() &&
+ pkt->req->contextId() !=
+ reqCoalescer[line_addr][0].first->req->contextId()) {
+ // can't coalesce releases from different wavefronts
+ return RequestStatus_Aliased;
+ }
+
+ // in addition to the packet, we need to save both request types
+ reqCoalescer[line_addr].push_back(
+ RequestDesc(pkt, std::vector<RubyRequestType>()) );
+ reqCoalescer[line_addr].back().second.push_back(primary_type);
+ reqCoalescer[line_addr].back().second.push_back(secondary_type);
+ if (!issueEvent.scheduled())
+ schedule(issueEvent, curTick());
+ // TODO: issue hardware prefetches here
+ return RequestStatus_Issued;
+}
+
+void
+GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
+{
+
+ int proc_id = -1;
+ if (pkt != NULL && pkt->req->hasContextId()) {
+ proc_id = pkt->req->contextId();
+ }
+
+ // If valid, copy the pc to the ruby request
+ Addr pc = 0;
+ if (pkt->req->hasPC()) {
+ pc = pkt->req->getPC();
+ }
+
+ // At the moment setting scopes only counts
+ // for GPU spill space accesses
+ // which is pkt->req->isStack()
+ // this scope is REPLACE since it
+ // does not need to be flushed at the end
+ // of a kernel Private and local may need
+ // to be visible at the end of the kernel
+ HSASegment accessSegment = reqSegmentToHSASegment(pkt->req);
+ HSAScope accessScope = reqScopeToHSAScope(pkt->req);
+
+ Addr line_addr = makeLineAddress(pkt->getAddr());
+
+ // Creating WriteMask that records written bytes
+ // and atomic operations. This enables partial writes
+ // and partial reads of those writes
+ DataBlock dataBlock;
+ dataBlock.clear();
+ uint32_t blockSize = RubySystem::getBlockSizeBytes();
+ std::vector<bool> accessMask(blockSize,false);
+ std::vector< std::pair<int,AtomicOpFunctor*> > atomicOps;
+ uint32_t tableSize = reqCoalescer[line_addr].size();
+ for (int i = 0; i < tableSize; i++) {
+ PacketPtr tmpPkt = reqCoalescer[line_addr][i].first;
+ uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
+ uint32_t tmpSize = tmpPkt->getSize();
+ if (tmpPkt->isAtomicOp()) {
+ std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
+ tmpPkt->getAtomicOp());
+ atomicOps.push_back(tmpAtomicOp);
+ } else if(tmpPkt->isWrite()) {
+ dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
+ tmpOffset, tmpSize);
+ }
+ for (int j = 0; j < tmpSize; j++) {
+ accessMask[tmpOffset + j] = true;
+ }
+ }
+ std::shared_ptr<RubyRequest> msg;
+ if (pkt->isAtomicOp()) {
+ msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
+ pkt->getPtr<uint8_t>(),
+ pkt->getSize(), pc, secondary_type,
+ RubyAccessMode_Supervisor, pkt,
+ PrefetchBit_No, proc_id, 100,
+ blockSize, accessMask,
+ dataBlock, atomicOps,
+ accessScope, accessSegment);
+ } else {
+ msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
+ pkt->getPtr<uint8_t>(),
+ pkt->getSize(), pc, secondary_type,
+ RubyAccessMode_Supervisor, pkt,
+ PrefetchBit_No, proc_id, 100,
+ blockSize, accessMask,
+ dataBlock,
+ accessScope, accessSegment);
+ }
+ DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
+ curTick(), m_version, "Coal", "Begin", "", "",
+ printAddress(msg->getPhysicalAddress()),
+ RubyRequestType_to_string(secondary_type));
+
+ fatal_if(secondary_type == RubyRequestType_IFETCH,
+ "there should not be any I-Fetch requests in the GPU Coalescer");
+
+ // Send the message to the cache controller
+ fatal_if(m_data_cache_hit_latency == 0,
+ "should not have a latency of zero");
+
+ assert(m_mandatory_q_ptr);
+ m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+}
+
+template <class KEY, class VALUE>
+std::ostream &
+operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
+{
+ out << "[";
+ for (auto i = map.begin(); i != map.end(); ++i)
+ out << " " << i->first << "=" << i->second;
+ out << " ]";
+
+ return out;
+}
+
+void
+GPUCoalescer::print(ostream& out) const
+{
+ out << "[GPUCoalescer: " << m_version
+ << ", outstanding requests: " << m_outstanding_count
+ << ", read request table: " << m_readRequestTable
+ << ", write request table: " << m_writeRequestTable
+ << "]";
+}
+
+// this can be called from setState whenever coherence permissions are
+// upgraded when invoked, coherence violations will be checked for the
+// given block
+void
+GPUCoalescer::checkCoherence(Addr addr)
+{
+#ifdef CHECK_COHERENCE
+ m_ruby_system->checkGlobalCoherenceInvariant(addr);
+#endif
+}
+
+void
+GPUCoalescer::recordRequestType(SequencerRequestType requestType) {
+ DPRINTF(RubyStats, "Recorded statistic: %s\n",
+ SequencerRequestType_to_string(requestType));
+}
+
+GPUCoalescer::IssueEvent::IssueEvent(GPUCoalescer* _seq)
+ : Event(Progress_Event_Pri), seq(_seq)
+{
+}
+
+
+void
+GPUCoalescer::completeIssue()
+{
+ // newRequests has the cacheline addresses of all the
+ // requests which need to be issued to the memory subsystem
+ // in this cycle
+ int len = newRequests.size();
+ DPRINTF(GPUCoalescer, "Completing issue for %d new requests.\n", len);
+ for (int i = 0; i < len; ++i) {
+ // Get the requests from reqCoalescer table. Get only the
+ // first request for each cacheline, the remaining requests
+ // can be coalesced with the first request. So, only
+ // one request is issued per cacheline.
+ RequestDesc info = reqCoalescer[newRequests[i]][0];
+ PacketPtr pkt = info.first;
+ DPRINTF(GPUCoalescer, "Completing for newReq %d: paddr %#x\n",
+ i, pkt->req->getPaddr());
+ // Insert this request to the read/writeRequestTables. These tables
+ // are used to track aliased requests in makeRequest subroutine
+ bool found = insertRequest(pkt, info.second[PrimaryType]);
+
+ if (found) {
+ panic("GPUCoalescer::makeRequest should never be called if the "
+ "request is already outstanding\n");
+ }
+
+ // Issue request to ruby subsystem
+ issueRequest(pkt, info.second[SecondaryType]);
+ }
+ newRequests.clear();
+
+ // have Kernel End releases been issued this cycle
+ len = newKernelEnds.size();
+ for (int i = 0; i < len; i++) {
+ kernelCallback(newKernelEnds[i]);
+ }
+ newKernelEnds.clear();
+}
+
+void
+GPUCoalescer::IssueEvent::process()
+{
+ seq->completeIssue();
+}
+
+const char *
+GPUCoalescer::IssueEvent::description() const
+{
+ return "Issue coalesced request";
+}
+
+void
+GPUCoalescer::evictionCallback(Addr address)
+{
+ ruby_eviction_callback(address);
+}
+
+void
+GPUCoalescer::kernelCallback(int wavefront_id)
+{
+ assert(kernelEndList.count(wavefront_id));
+
+ ruby_hit_callback(kernelEndList[wavefront_id]);
+
+ kernelEndList.erase(wavefront_id);
+}
+
+void
+GPUCoalescer::atomicCallback(Addr address,
+ MachineType mach,
+ const DataBlock& data)
+{
+ assert(address == makeLineAddress(address));
+
+ DPRINTF(GPUCoalescer, "atomic callback for address %#x\n", address);
+ assert(m_writeRequestTable.count(makeLineAddress(address)));
+
+ RequestTable::iterator i = m_writeRequestTable.find(address);
+ assert(i != m_writeRequestTable.end());
+ GPUCoalescerRequest* srequest = i->second;
+
+ m_writeRequestTable.erase(i);
+ markRemoved();
+
+ assert((srequest->m_type == RubyRequestType_ATOMIC) ||
+ (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
+ (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
+
+
+ // Atomics don't write to cache, so there is no MRU update...
+
+ recordMissLatency(srequest, mach,
+ srequest->issue_time, Cycles(0), Cycles(0), true, false);
+
+ PacketPtr pkt = srequest->pkt;
+ Addr request_address = pkt->getAddr();
+ Addr request_line_address = makeLineAddress(pkt->getAddr());
+
+ int len = reqCoalescer[request_line_address].size();
+ std::vector<PacketPtr> mylist;
+ for (int i = 0; i < len; ++i) {
+ PacketPtr pkt = reqCoalescer[request_line_address][i].first;
+ assert(srequest->m_type ==
+ reqCoalescer[request_line_address][i].second[PrimaryType]);
+ request_address = (pkt->getAddr());
+ request_line_address = makeLineAddress(request_address);
+ if (pkt->getPtr<uint8_t>() &&
+ srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
+ /* atomics are done in memory, and return the data *before* the atomic op... */
+ memcpy(pkt->getPtr<uint8_t>(),
+ data.getData(getOffset(request_address),
+ pkt->getSize()),
+ pkt->getSize());
+ } else {
+ DPRINTF(MemoryAccess,
+ "WARNING. Data not transfered from Ruby to M5 for type " \
+ "%s\n",
+ RubyRequestType_to_string(srequest->m_type));
+ }
+
+ // If using the RubyTester, update the RubyTester sender state's
+ // subBlock with the recieved data. The tester will later access
+ // this state.
+ // Note: RubyPort will access it's sender state before the
+ // RubyTester.
+ if (m_usingRubyTester) {
+ RubyPort::SenderState *requestSenderState =
+ safe_cast<RubyPort::SenderState*>(pkt->senderState);
+ RubyTester::SenderState* testerSenderState =
+ safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
+ testerSenderState->subBlock.mergeFrom(data);
+ }
+
+ mylist.push_back(pkt);
+ }
+ delete srequest;
+ reqCoalescer.erase(request_line_address);
+ assert(!reqCoalescer.count(request_line_address));
+
+ completeHitCallback(mylist, len);
+}
+
+void
+GPUCoalescer::recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
+{
+ if(myMachID == senderMachID) {
+ CP_TCPLdHits++;
+ } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) {
+ CP_TCPLdTransfers++;
+ } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) {
+ CP_TCCLdHits++;
+ } else {
+ CP_LdMiss++;
+ }
+}
+
+void
+GPUCoalescer::recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
+{
+ if(myMachID == senderMachID) {
+ CP_TCPStHits++;
+ } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) {
+ CP_TCPStTransfers++;
+ } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) {
+ CP_TCCStHits++;
+ } else {
+ CP_StMiss++;
+ }
+}
+
+void
+GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist, int len)
+{
+ for (int i = 0; i < len; ++i) {
+ RubyPort::SenderState *ss =
+ safe_cast<RubyPort::SenderState *>(mylist[i]->senderState);
+ MemSlavePort *port = ss->port;
+ assert(port != NULL);
+
+ mylist[i]->senderState = ss->predecessor;
+ delete ss;
+ port->hitCallback(mylist[i]);
+ trySendRetries();
+ }
+
+ testDrainComplete();
+}
+
+PacketPtr
+GPUCoalescer::mapAddrToPkt(Addr address)
+{
+ RequestTable::iterator i = m_readRequestTable.find(address);
+ assert(i != m_readRequestTable.end());
+ GPUCoalescerRequest* request = i->second;
+ return request->pkt;
+}
+
+void
+GPUCoalescer::recordMissLatency(GPUCoalescerRequest* srequest,
+ MachineType mach,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool success, bool isRegion)
+{
+ RubyRequestType type = srequest->m_type;
+ Cycles issued_time = srequest->issue_time;
+ Cycles completion_time = curCycle();
+ assert(completion_time >= issued_time);
+ Cycles total_lat = completion_time - issued_time;
+
+ // cache stats (valid for RfO protocol only)
+ if (mach == MachineType_TCP) {
+ if (type == RubyRequestType_LD) {
+ GPU_TCPLdHits++;
+ } else {
+ GPU_TCPStHits++;
+ }
+ } else if (mach == MachineType_L1Cache_wCC) {
+ if (type == RubyRequestType_LD) {
+ GPU_TCPLdTransfers++;
+ } else {
+ GPU_TCPStTransfers++;
+ }
+ } else if (mach == MachineType_TCC) {
+ if (type == RubyRequestType_LD) {
+ GPU_TCCLdHits++;
+ } else {
+ GPU_TCCStHits++;
+ }
+ } else {
+ if (type == RubyRequestType_LD) {
+ GPU_LdMiss++;
+ } else {
+ GPU_StMiss++;
+ }
+ }
+
+ // Profile all access latency, even zero latency accesses
+ m_latencyHist.sample(total_lat);
+ m_typeLatencyHist[type]->sample(total_lat);
+
+ // Profile the miss latency for all non-zero demand misses
+ if (total_lat != Cycles(0)) {
+ m_missLatencyHist.sample(total_lat);
+ m_missTypeLatencyHist[type]->sample(total_lat);
+
+ if (mach != MachineType_NUM) {
+ m_missMachLatencyHist[mach]->sample(total_lat);
+ m_missTypeMachLatencyHist[type][mach]->sample(total_lat);
+
+ if ((issued_time <= initialRequestTime) &&
+ (initialRequestTime <= forwardRequestTime) &&
+ (forwardRequestTime <= firstResponseTime) &&
+ (firstResponseTime <= completion_time)) {
+
+ m_IssueToInitialDelayHist[mach]->sample(
+ initialRequestTime - issued_time);
+ m_InitialToForwardDelayHist[mach]->sample(
+ forwardRequestTime - initialRequestTime);
+ m_ForwardToFirstResponseDelayHist[mach]->sample(
+ firstResponseTime - forwardRequestTime);
+ m_FirstResponseToCompletionDelayHist[mach]->sample(
+ completion_time - firstResponseTime);
+ }
+ }
+
+ }
+
+ DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
+ curTick(), m_version, "Coal",
+ success ? "Done" : "SC_Failed", "", "",
+ printAddress(srequest->pkt->getAddr()), total_lat);
+}
+
+void
+GPUCoalescer::regStats()
+{
+ // These statistical variables are not for display.
+ // The profiler will collate these across different
+ // coalescers and display those collated statistics.
+ m_outstandReqHist.init(10);
+ m_latencyHist.init(10);
+ m_missLatencyHist.init(10);
+
+ for (int i = 0; i < RubyRequestType_NUM; i++) {
+ m_typeLatencyHist.push_back(new Stats::Histogram());
+ m_typeLatencyHist[i]->init(10);
+
+ m_missTypeLatencyHist.push_back(new Stats::Histogram());
+ m_missTypeLatencyHist[i]->init(10);
+ }
+
+ for (int i = 0; i < MachineType_NUM; i++) {
+ m_missMachLatencyHist.push_back(new Stats::Histogram());
+ m_missMachLatencyHist[i]->init(10);
+
+ m_IssueToInitialDelayHist.push_back(new Stats::Histogram());
+ m_IssueToInitialDelayHist[i]->init(10);
+
+ m_InitialToForwardDelayHist.push_back(new Stats::Histogram());
+ m_InitialToForwardDelayHist[i]->init(10);
+
+ m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram());
+ m_ForwardToFirstResponseDelayHist[i]->init(10);
+
+ m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram());
+ m_FirstResponseToCompletionDelayHist[i]->init(10);
+ }
+
+ for (int i = 0; i < RubyRequestType_NUM; i++) {
+ m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>());
+
+ for (int j = 0; j < MachineType_NUM; j++) {
+ m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
+ m_missTypeMachLatencyHist[i][j]->init(10);
+ }
+ }
+
+ // GPU cache stats
+ GPU_TCPLdHits
+ .name(name() + ".gpu_tcp_ld_hits")
+ .desc("loads that hit in the TCP")
+ ;
+ GPU_TCPLdTransfers
+ .name(name() + ".gpu_tcp_ld_transfers")
+ .desc("TCP to TCP load transfers")
+ ;
+ GPU_TCCLdHits
+ .name(name() + ".gpu_tcc_ld_hits")
+ .desc("loads that hit in the TCC")
+ ;
+ GPU_LdMiss
+ .name(name() + ".gpu_ld_misses")
+ .desc("loads that miss in the GPU")
+ ;
+
+ GPU_TCPStHits
+ .name(name() + ".gpu_tcp_st_hits")
+ .desc("stores that hit in the TCP")
+ ;
+ GPU_TCPStTransfers
+ .name(name() + ".gpu_tcp_st_transfers")
+ .desc("TCP to TCP store transfers")
+ ;
+ GPU_TCCStHits
+ .name(name() + ".gpu_tcc_st_hits")
+ .desc("stores that hit in the TCC")
+ ;
+ GPU_StMiss
+ .name(name() + ".gpu_st_misses")
+ .desc("stores that miss in the GPU")
+ ;
+
+ // CP cache stats
+ CP_TCPLdHits
+ .name(name() + ".cp_tcp_ld_hits")
+ .desc("loads that hit in the TCP")
+ ;
+ CP_TCPLdTransfers
+ .name(name() + ".cp_tcp_ld_transfers")
+ .desc("TCP to TCP load transfers")
+ ;
+ CP_TCCLdHits
+ .name(name() + ".cp_tcc_ld_hits")
+ .desc("loads that hit in the TCC")
+ ;
+ CP_LdMiss
+ .name(name() + ".cp_ld_misses")
+ .desc("loads that miss in the GPU")
+ ;
+
+ CP_TCPStHits
+ .name(name() + ".cp_tcp_st_hits")
+ .desc("stores that hit in the TCP")
+ ;
+ CP_TCPStTransfers
+ .name(name() + ".cp_tcp_st_transfers")
+ .desc("TCP to TCP store transfers")
+ ;
+ CP_TCCStHits
+ .name(name() + ".cp_tcc_st_hits")
+ .desc("stores that hit in the TCC")
+ ;
+ CP_StMiss
+ .name(name() + ".cp_st_misses")
+ .desc("stores that miss in the GPU")
+ ;
+}
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh
new file mode 100644
index 000000000..dbd47059c
--- /dev/null
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+#define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+
+#include <iostream>
+#include <unordered_map>
+
+#include "base/statistics.hh"
+#include "mem/protocol/HSAScope.hh"
+#include "mem/protocol/HSASegment.hh"
+#include "mem/protocol/PrefetchBit.hh"
+#include "mem/protocol/RubyAccessMode.hh"
+#include "mem/protocol/RubyRequestType.hh"
+#include "mem/protocol/SequencerRequestType.hh"
+#include "mem/request.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Consumer.hh"
+#include "mem/ruby/system/RubyPort.hh"
+
+class DataBlock;
+class CacheMsg;
+class MachineID;
+class CacheMemory;
+
+class RubyGPUCoalescerParams;
+
+HSAScope reqScopeToHSAScope(Request* req);
+HSASegment reqSegmentToHSASegment(Request* req);
+
+struct GPUCoalescerRequest
+{
+ PacketPtr pkt;
+ RubyRequestType m_type;
+ Cycles issue_time;
+
+ GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
+ Cycles _issue_time)
+ : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
+ {}
+};
+
+std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
+
+class GPUCoalescer : public RubyPort
+{
+ public:
+ typedef RubyGPUCoalescerParams Params;
+ GPUCoalescer(const Params *);
+ ~GPUCoalescer();
+
+ // Public Methods
+ void wakeup(); // Used only for deadlock detection
+
+ void printProgress(std::ostream& out) const;
+ void resetStats();
+ void collateStats();
+ void regStats();
+
+ void writeCallback(Addr address, DataBlock& data);
+
+ void writeCallback(Addr address,
+ MachineType mach,
+ DataBlock& data);
+
+ void writeCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool isRegion);
+
+ void writeCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime);
+
+ void readCallback(Addr address, DataBlock& data);
+
+ void readCallback(Addr address,
+ MachineType mach,
+ DataBlock& data);
+
+ void readCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime);
+
+ void readCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool isRegion);
+ /* atomics need their own callback because the data
+ might be const coming from SLICC */
+ void atomicCallback(Addr address,
+ MachineType mach,
+ const DataBlock& data);
+
+ void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
+ void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
+
+ // Alternate implementations in VIPER Coalescer
+ virtual RequestStatus makeRequest(PacketPtr pkt);
+
+ int outstandingCount() const { return m_outstanding_count; }
+
+ bool
+ isDeadlockEventScheduled() const
+ {
+ return deadlockCheckEvent.scheduled();
+ }
+
+ void
+ descheduleDeadlockEvent()
+ {
+ deschedule(deadlockCheckEvent);
+ }
+
+ bool empty() const;
+
+ void print(std::ostream& out) const;
+ void checkCoherence(Addr address);
+
+ void markRemoved();
+ void removeRequest(GPUCoalescerRequest* request);
+ void evictionCallback(Addr address);
+ void completeIssue();
+
+ void insertKernel(int wavefront_id, PacketPtr pkt);
+
+ void recordRequestType(SequencerRequestType requestType);
+ Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
+
+ Stats::Histogram& getLatencyHist() { return m_latencyHist; }
+ Stats::Histogram& getTypeLatencyHist(uint32_t t)
+ { return *m_typeLatencyHist[t]; }
+
+ Stats::Histogram& getMissLatencyHist()
+ { return m_missLatencyHist; }
+ Stats::Histogram& getMissTypeLatencyHist(uint32_t t)
+ { return *m_missTypeLatencyHist[t]; }
+
+ Stats::Histogram& getMissMachLatencyHist(uint32_t t) const
+ { return *m_missMachLatencyHist[t]; }
+
+ Stats::Histogram&
+ getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
+ { return *m_missTypeMachLatencyHist[r][t]; }
+
+ Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const
+ { return *m_IssueToInitialDelayHist[t]; }
+
+ Stats::Histogram&
+ getInitialToForwardDelayHist(const MachineType t) const
+ { return *m_InitialToForwardDelayHist[t]; }
+
+ Stats::Histogram&
+ getForwardRequestToFirstResponseHist(const MachineType t) const
+ { return *m_ForwardToFirstResponseDelayHist[t]; }
+
+ Stats::Histogram&
+ getFirstResponseToCompletionDelayHist(const MachineType t) const
+ { return *m_FirstResponseToCompletionDelayHist[t]; }
+
+ // Changed to protected to enable inheritance by VIPER Coalescer
+ protected:
+ bool tryCacheAccess(Addr addr, RubyRequestType type,
+ Addr pc, RubyAccessMode access_mode,
+ int size, DataBlock*& data_ptr);
+ // Alternate implementations in VIPER Coalescer
+ virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
+
+ void kernelCallback(int wavfront_id);
+
+ void hitCallback(GPUCoalescerRequest* request,
+ MachineType mach,
+ DataBlock& data,
+ bool success,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool isRegion);
+ void recordMissLatency(GPUCoalescerRequest* request,
+ MachineType mach,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool success, bool isRegion);
+ void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
+ PacketPtr mapAddrToPkt(Addr address);
+
+
+ RequestStatus getRequestStatus(PacketPtr pkt,
+ RubyRequestType request_type);
+ bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
+
+ bool handleLlsc(Addr address, GPUCoalescerRequest* request);
+
+ // Private copy constructor and assignment operator
+ GPUCoalescer(const GPUCoalescer& obj);
+ GPUCoalescer& operator=(const GPUCoalescer& obj);
+
+ class IssueEvent : public Event
+ {
+ private:
+ GPUCoalescer *seq;
+ public:
+ IssueEvent(GPUCoalescer *_seq);
+ void process();
+ const char *description() const;
+ };
+
+ IssueEvent issueEvent;
+
+
+ // Changed to protected to enable inheritance by VIPER Coalescer
+ protected:
+ int m_max_outstanding_requests;
+ int m_deadlock_threshold;
+
+ CacheMemory* m_dataCache_ptr;
+ CacheMemory* m_instCache_ptr;
+
+ // The cache access latency for this GPU data cache. This is assessed at the
+ // beginning of each access. This should be very similar to the
+ // implementation in Sequencer() as this is very much like a Sequencer
+ Cycles m_data_cache_hit_latency;
+
+ // We need to track both the primary and secondary request types.
+ // The secondary request type comprises a subset of RubyRequestTypes that
+ // are understood by the L1 Controller. A primary request type can be any
+ // RubyRequestType.
+ enum {PrimaryType, SecondaryType};
+ typedef std::pair<PacketPtr, std::vector<RubyRequestType> > RequestDesc;
+ typedef std::unordered_map<Addr, std::vector<RequestDesc> > CoalescingTable;
+ CoalescingTable reqCoalescer;
+ std::vector<Addr> newRequests;
+
+ typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
+ RequestTable m_writeRequestTable;
+ RequestTable m_readRequestTable;
+ // Global outstanding request count, across all request tables
+ int m_outstanding_count;
+ bool m_deadlock_check_scheduled;
+ std::unordered_map<int, PacketPtr> kernelEndList;
+ std::vector<int> newKernelEnds;
+
+ int m_store_waiting_on_load_cycles;
+ int m_store_waiting_on_store_cycles;
+ int m_load_waiting_on_store_cycles;
+ int m_load_waiting_on_load_cycles;
+
+ bool m_usingNetworkTester;
+
+ class GPUCoalescerWakeupEvent : public Event
+ {
+ private:
+ GPUCoalescer *m_GPUCoalescer_ptr;
+
+ public:
+ GPUCoalescerWakeupEvent(GPUCoalescer *_seq) :
+ m_GPUCoalescer_ptr(_seq) {}
+ void process() { m_GPUCoalescer_ptr->wakeup(); }
+ const char *description() const
+ {
+ return "GPUCoalescer deadlock check";
+ }
+ };
+
+ GPUCoalescerWakeupEvent deadlockCheckEvent;
+ bool assumingRfOCoherence;
+
+ // m5 style stats for TCP hit/miss counts
+ Stats::Scalar GPU_TCPLdHits;
+ Stats::Scalar GPU_TCPLdTransfers;
+ Stats::Scalar GPU_TCCLdHits;
+ Stats::Scalar GPU_LdMiss;
+
+ Stats::Scalar GPU_TCPStHits;
+ Stats::Scalar GPU_TCPStTransfers;
+ Stats::Scalar GPU_TCCStHits;
+ Stats::Scalar GPU_StMiss;
+
+ Stats::Scalar CP_TCPLdHits;
+ Stats::Scalar CP_TCPLdTransfers;
+ Stats::Scalar CP_TCCLdHits;
+ Stats::Scalar CP_LdMiss;
+
+ Stats::Scalar CP_TCPStHits;
+ Stats::Scalar CP_TCPStTransfers;
+ Stats::Scalar CP_TCCStHits;
+ Stats::Scalar CP_StMiss;
+
+ //! Histogram for number of outstanding requests per cycle.
+ Stats::Histogram m_outstandReqHist;
+
+ //! Histogram for holding latency profile of all requests.
+ Stats::Histogram m_latencyHist;
+ std::vector<Stats::Histogram *> m_typeLatencyHist;
+
+ //! Histogram for holding latency profile of all requests that
+ //! miss in the controller connected to this sequencer.
+ Stats::Histogram m_missLatencyHist;
+ std::vector<Stats::Histogram *> m_missTypeLatencyHist;
+
+ //! Histograms for profiling the latencies for requests that
+ //! required external messages.
+ std::vector<Stats::Histogram *> m_missMachLatencyHist;
+ std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist;
+
+ //! Histograms for recording the breakdown of miss latency
+ std::vector<Stats::Histogram *> m_IssueToInitialDelayHist;
+ std::vector<Stats::Histogram *> m_InitialToForwardDelayHist;
+ std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist;
+ std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist;
+};
+
+inline std::ostream&
+operator<<(std::ostream& out, const GPUCoalescer& obj)
+{
+ obj.print(out);
+ out << std::flush;
+ return out;
+}
+
+#endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+
diff --git a/src/mem/ruby/system/GPUCoalescer.py b/src/mem/ruby/system/GPUCoalescer.py
new file mode 100644
index 000000000..0c19f875d
--- /dev/null
+++ b/src/mem/ruby/system/GPUCoalescer.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2015 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Steve Reinhardt
+# Brad Beckmann
+
+from m5.params import *
+from m5.proxy import *
+from Sequencer import *
+
+class RubyGPUCoalescer(RubySequencer):
+ type = 'RubyGPUCoalescer'
+ cxx_class = 'GPUCoalescer'
+ cxx_header = "mem/ruby/system/GPUCoalescer.hh"
+
+ # max_outstanding_requests = (wave front slots) x (wave front size)
+ max_outstanding_requests = Param.Int(40*64,
+ "max requests (incl. prefetches) outstanding")
+ assume_rfo = Param.Bool(True, "assume protocol implementes Read for "
+ "Ownership coherence");
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 5a5f528bb..bf4002126 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -60,7 +60,8 @@ RubyPort::RubyPort(const Params *p)
memSlavePort(csprintf("%s-mem-slave-port", name()), this,
p->ruby_system->getAccessBackingStore(), -1,
p->no_retry_on_stall),
- gotAddrRanges(p->port_master_connection_count)
+ gotAddrRanges(p->port_master_connection_count),
+ m_isCPUSequencer(p->is_cpu_sequencer)
{
assert(m_version != -1);
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index 07e0fde5a..6bd92b654 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -167,6 +167,8 @@ class RubyPort : public MemObject
uint32_t getId() { return m_version; }
DrainState drain() override;
+ bool isCPUSequencer() { return m_isCPUSequencer; }
+
protected:
void trySendRetries();
void ruby_hit_callback(PacketPtr pkt);
@@ -218,6 +220,8 @@ class RubyPort : public MemObject
// that should be called when the Sequencer becomes available after a stall.
//
std::vector<MemSlavePort *> retryList;
+
+ bool m_isCPUSequencer;
};
#endif // __MEM_RUBY_SYSTEM_RUBYPORT_HH__
diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc
index 1ecd2e098..e1717e519 100644
--- a/src/mem/ruby/system/RubySystem.cc
+++ b/src/mem/ruby/system/RubySystem.cc
@@ -107,7 +107,7 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
Sequencer* sequencer_ptr = NULL;
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
- sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
+ sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer());
if (sequencer_ptr == NULL) {
sequencer_ptr = sequencer_map[cntrl];
}
diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript
index 8c5077362..b67311bca 100644
--- a/src/mem/ruby/system/SConscript
+++ b/src/mem/ruby/system/SConscript
@@ -33,12 +33,22 @@ Import('*')
if env['PROTOCOL'] == 'None':
Return()
+if env['BUILD_GPU']:
+ SimObject('GPUCoalescer.py')
SimObject('RubySystem.py')
SimObject('Sequencer.py')
+SimObject('WeightedLRUReplacementPolicy.py')
+if env['BUILD_GPU']:
+ SimObject('VIPERCoalescer.py')
Source('CacheRecorder.cc')
Source('DMASequencer.cc')
+if env['BUILD_GPU']:
+ Source('GPUCoalescer.cc')
Source('RubyPort.cc')
Source('RubyPortProxy.cc')
Source('RubySystem.cc')
Source('Sequencer.cc')
+if env['BUILD_GPU']:
+ Source('VIPERCoalescer.cc')
+Source('WeightedLRUPolicy.cc')
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 50418c700..c2727b41d 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -63,6 +63,7 @@ Sequencer::Sequencer(const Params *p)
m_max_outstanding_requests = p->max_outstanding_requests;
m_deadlock_threshold = p->deadlock_threshold;
+ m_coreId = p->coreid; // for tracking the two CorePair sequencers
assert(m_max_outstanding_requests > 0);
assert(m_deadlock_threshold > 0);
assert(m_instCache_ptr != NULL);
@@ -593,6 +594,8 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
ContextID proc_id = pkt->req->hasContextId() ?
pkt->req->contextId() : InvalidContextID;
+ ContextID core_id = coreId();
+
// If valid, copy the pc to the ruby request
Addr pc = 0;
if (pkt->req->hasPC()) {
@@ -607,7 +610,7 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
nullptr : pkt->getPtr<uint8_t>(),
pkt->getSize(), pc, secondary_type,
RubyAccessMode_Supervisor, pkt,
- PrefetchBit_No, proc_id);
+ PrefetchBit_No, proc_id, core_id);
DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n",
curTick(), m_version, "Seq", "Begin", "", "",
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index 47af7ea1e..2a2f49587 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -99,6 +99,7 @@ class Sequencer : public RubyPort
void markRemoved();
void evictionCallback(Addr address);
void invalidateSC(Addr address);
+ int coreId() const { return m_coreId; }
void recordRequestType(SequencerRequestType requestType);
Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
@@ -198,6 +199,8 @@ class Sequencer : public RubyPort
Stats::Scalar m_load_waiting_on_store;
Stats::Scalar m_load_waiting_on_load;
+ int m_coreId;
+
bool m_usingNetworkTester;
//! Histogram for number of outstanding requests per cycle.
diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py
index 7c90eb29c..d6ee0aa2f 100644
--- a/src/mem/ruby/system/Sequencer.py
+++ b/src/mem/ruby/system/Sequencer.py
@@ -32,54 +32,58 @@ from m5.proxy import *
from MemObject import MemObject
class RubyPort(MemObject):
- type = 'RubyPort'
- abstract = True
- cxx_header = "mem/ruby/system/RubyPort.hh"
- version = Param.Int(0, "")
+ type = 'RubyPort'
+ abstract = True
+ cxx_header = "mem/ruby/system/RubyPort.hh"
+ version = Param.Int(0, "")
- slave = VectorSlavePort("CPU slave port")
- master = VectorMasterPort("CPU master port")
- pio_master_port = MasterPort("Ruby mem master port")
- mem_master_port = MasterPort("Ruby mem master port")
- pio_slave_port = SlavePort("Ruby pio slave port")
- mem_slave_port = SlavePort("Ruby memory port")
+ slave = VectorSlavePort("CPU slave port")
+ master = VectorMasterPort("CPU master port")
+ pio_master_port = MasterPort("Ruby mem master port")
+ mem_master_port = MasterPort("Ruby mem master port")
+ pio_slave_port = SlavePort("Ruby pio slave port")
+ mem_slave_port = SlavePort("Ruby memory port")
- using_ruby_tester = Param.Bool(False, "")
- no_retry_on_stall = Param.Bool(False, "")
- ruby_system = Param.RubySystem(Parent.any, "")
- system = Param.System(Parent.any, "system object")
- support_data_reqs = Param.Bool(True, "data cache requests supported")
- support_inst_reqs = Param.Bool(True, "inst cache requests supported")
+ using_ruby_tester = Param.Bool(False, "")
+ no_retry_on_stall = Param.Bool(False, "")
+ ruby_system = Param.RubySystem(Parent.any, "")
+ system = Param.System(Parent.any, "system object")
+ support_data_reqs = Param.Bool(True, "data cache requests supported")
+ support_inst_reqs = Param.Bool(True, "inst cache requests supported")
+ is_cpu_sequencer = Param.Bool(True, "connected to a cpu")
class RubyPortProxy(RubyPort):
- type = 'RubyPortProxy'
- cxx_header = "mem/ruby/system/RubyPortProxy.hh"
+ type = 'RubyPortProxy'
+ cxx_header = "mem/ruby/system/RubyPortProxy.hh"
class RubySequencer(RubyPort):
- type = 'RubySequencer'
- cxx_class = 'Sequencer'
- cxx_header = "mem/ruby/system/Sequencer.hh"
+ type = 'RubySequencer'
+ cxx_class = 'Sequencer'
+ cxx_header = "mem/ruby/system/Sequencer.hh"
- icache = Param.RubyCache("")
- dcache = Param.RubyCache("")
- # Cache latencies currently assessed at the beginning of each access
- # NOTE: Setting these values to a value greater than one will result in
- # O3 CPU pipeline bubbles and negatively impact performance
- # TODO: Latencies should be migrated into each top-level cache controller
- icache_hit_latency = Param.Cycles(1, "Inst cache hit latency")
- dcache_hit_latency = Param.Cycles(1, "Data cache hit latency")
- max_outstanding_requests = Param.Int(16,
- "max requests (incl. prefetches) outstanding")
- deadlock_threshold = Param.Cycles(500000,
- "max outstanding cycles for a request before deadlock/livelock declared")
- using_network_tester = Param.Bool(False, "")
+ icache = Param.RubyCache("")
+ dcache = Param.RubyCache("")
+ # Cache latencies currently assessed at the beginning of each access
+ # NOTE: Setting these values to a value greater than one will result in
+ # O3 CPU pipeline bubbles and negatively impact performance
+ # TODO: Latencies should be migrated into each top-level cache controller
+ icache_hit_latency = Param.Cycles(1, "Inst cache hit latency")
+ dcache_hit_latency = Param.Cycles(1, "Data cache hit latency")
+ max_outstanding_requests = Param.Int(16,
+ "max requests (incl. prefetches) outstanding")
+ deadlock_threshold = Param.Cycles(500000,
+ "max outstanding cycles for a request before deadlock/livelock declared")
+ using_network_tester = Param.Bool(False, "")
+ # id used by protocols that support multiple sequencers per controller
+ # 99 is the dummy default value
+ coreid = Param.Int(99, "CorePair core id")
class DMASequencer(MemObject):
- type = 'DMASequencer'
- cxx_header = "mem/ruby/system/DMASequencer.hh"
+ type = 'DMASequencer'
+ cxx_header = "mem/ruby/system/DMASequencer.hh"
- version = Param.Int(0, "")
- slave = SlavePort("Device slave port")
- using_ruby_tester = Param.Bool(False, "")
- ruby_system = Param.RubySystem(Parent.any, "")
- system = Param.System(Parent.any, "system object")
+ version = Param.Int(0, "")
+ slave = SlavePort("Device slave port")
+ using_ruby_tester = Param.Bool(False, "")
+ ruby_system = Param.RubySystem(Parent.any, "")
+ system = Param.System(Parent.any, "system object")
diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc
new file mode 100644
index 000000000..ca91f2723
--- /dev/null
+++ b/src/mem/ruby/system/VIPERCoalescer.cc
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#include "base/misc.hh"
+#include "base/str.hh"
+#include "config/the_isa.hh"
+
+#if THE_ISA == X86_ISA
+#include "arch/x86/insts/microldstop.hh"
+
+#endif // X86_ISA
+#include "mem/ruby/system/VIPERCoalescer.hh"
+
+#include "cpu/testers/rubytest/RubyTester.hh"
+#include "debug/GPUCoalescer.hh"
+#include "debug/MemoryAccess.hh"
+#include "mem/packet.hh"
+#include "mem/ruby/common/SubBlock.hh"
+#include "mem/ruby/network/MessageBuffer.hh"
+#include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/slicc_interface/AbstractController.hh"
+#include "mem/ruby/slicc_interface/RubyRequest.hh"
+#include "mem/ruby/structures/CacheMemory.hh"
+#include "mem/ruby/system/GPUCoalescer.hh"
+#include "mem/ruby/system/RubySystem.hh"
+#include "params/VIPERCoalescer.hh"
+
+using namespace std;
+
+VIPERCoalescer *
+VIPERCoalescerParams::create()
+{
+ return new VIPERCoalescer(this);
+}
+
+VIPERCoalescer::VIPERCoalescer(const Params *p)
+ : GPUCoalescer(p)
+{
+ m_max_wb_per_cycle=p->max_wb_per_cycle;
+ m_max_inv_per_cycle=p->max_inv_per_cycle;
+ m_outstanding_inv = 0;
+ m_outstanding_wb = 0;
+}
+
+VIPERCoalescer::~VIPERCoalescer()
+{
+}
+
+// Analyzes the packet to see if this request can be coalesced.
+// If request can be coalesced, this request is added to the reqCoalescer table
+// and makeRequest returns RequestStatus_Issued;
+// If this is the first request to a cacheline, request is added to both
+// newRequests queue and to the reqCoalescer table; makeRequest
+// returns RequestStatus_Issued.
+// If there is a pending request to this cacheline and this request
+// can't be coalesced, RequestStatus_Aliased is returned and
+// the packet needs to be reissued.
+RequestStatus
+VIPERCoalescer::makeRequest(PacketPtr pkt)
+{
+ if (m_outstanding_wb | m_outstanding_inv) {
+ DPRINTF(GPUCoalescer,
+ "There are %d Writebacks and %d Invalidatons\n",
+ m_outstanding_wb, m_outstanding_inv);
+ }
+ // Are we in the middle of a release
+ if ((m_outstanding_wb) > 0) {
+ if (pkt->req->isKernel()) {
+ // Everythign is fine
+ // Barriers and Kernel End scan coalesce
+ // If it is a Kerenl Begin flush the cache
+ if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) {
+ invL1();
+ }
+
+ if (pkt->req->isRelease()) {
+ insertKernel(pkt->req->contextId(), pkt);
+ }
+
+ return RequestStatus_Issued;
+ }
+// return RequestStatus_Aliased;
+ } else if (pkt->req->isKernel() && pkt->req->isRelease()) {
+ // Flush Dirty Data on Kernel End
+ // isKernel + isRelease
+ insertKernel(pkt->req->contextId(), pkt);
+ wbL1();
+ if(m_outstanding_wb == 0) {
+ for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
+ newKernelEnds.push_back(it->first);
+ }
+ completeIssue();
+ }
+ return RequestStatus_Issued;
+ }
+ RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
+ if (requestStatus!=RequestStatus_Issued) {
+ // Request not isssued
+ // enqueue Retry
+ DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
+ return requestStatus;
+ } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
+ // Invalidate clean Data on Kernel Begin
+ // isKernel + isAcquire
+ invL1();
+ } else if (pkt->req->isAcquire() && pkt->req->isRelease()) {
+ // Deschedule the AtomicAcqRel and
+ // Flush and Invalidate the L1 cache
+ invwbL1();
+ if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
+ DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
+ deschedule(issueEvent);
+ }
+ } else if (pkt->req->isRelease()) {
+ // Deschedule the StoreRel and
+ // Flush the L1 cache
+ wbL1();
+ if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
+ DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
+ deschedule(issueEvent);
+ }
+ } else if (pkt->req->isAcquire()) {
+ // LoadAcq or AtomicAcq
+ // Invalidate the L1 cache
+ invL1();
+ }
+ // Request was successful
+ if (m_outstanding_wb == 0) {
+ if (!issueEvent.scheduled()) {
+ DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n");
+ schedule(issueEvent, curTick());
+ }
+ }
+ return RequestStatus_Issued;
+}
+
+void
+VIPERCoalescer::wbCallback(Addr addr)
+{
+ m_outstanding_wb--;
+ // if L1 Flush Complete
+ // attemnpt to schedule issueEvent
+ assert(((int) m_outstanding_wb) >= 0);
+ if (m_outstanding_wb == 0) {
+ for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
+ newKernelEnds.push_back(it->first);
+ }
+ completeIssue();
+ }
+ trySendRetries();
+}
+
+void
+VIPERCoalescer::invCallback(Addr addr)
+{
+ m_outstanding_inv--;
+ // if L1 Flush Complete
+ // attemnpt to schedule issueEvent
+ // This probably won't happen, since
+ // we dont wait on cache invalidations
+ if (m_outstanding_wb == 0) {
+ for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
+ newKernelEnds.push_back(it->first);
+ }
+ completeIssue();
+ }
+ trySendRetries();
+}
+
+/**
+ * Invalidate L1 cache (Acquire)
+ */
+void
+VIPERCoalescer::invL1()
+{
+ int size = m_dataCache_ptr->getNumBlocks();
+ DPRINTF(GPUCoalescer,
+ "There are %d Invalidations outstanding before Cache Walk\n",
+ m_outstanding_inv);
+ // Walk the cache
+ for (int i = 0; i < size; i++) {
+ Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
+ // Evict Read-only data
+ std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
+ clockEdge(), addr, (uint8_t*) 0, 0, 0,
+ RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
+ nullptr);
+ assert(m_mandatory_q_ptr != NULL);
+ m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+ m_outstanding_inv++;
+ }
+ DPRINTF(GPUCoalescer,
+ "There are %d Invalidatons outstanding after Cache Walk\n",
+ m_outstanding_inv);
+}
+
+/**
+ * Writeback L1 cache (Release)
+ */
+void
+VIPERCoalescer::wbL1()
+{
+ int size = m_dataCache_ptr->getNumBlocks();
+ DPRINTF(GPUCoalescer,
+ "There are %d Writebacks outstanding before Cache Walk\n",
+ m_outstanding_wb);
+ // Walk the cache
+ for (int i = 0; i < size; i++) {
+ Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
+ // Write dirty data back
+ std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
+ clockEdge(), addr, (uint8_t*) 0, 0, 0,
+ RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
+ nullptr);
+ assert(m_mandatory_q_ptr != NULL);
+ m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+ m_outstanding_wb++;
+ }
+ DPRINTF(GPUCoalescer,
+ "There are %d Writebacks outstanding after Cache Walk\n",
+ m_outstanding_wb);
+}
+
+/**
+ * Invalidate and Writeback L1 cache (Acquire&Release)
+ */
+void
+VIPERCoalescer::invwbL1()
+{
+ int size = m_dataCache_ptr->getNumBlocks();
+ // Walk the cache
+ for(int i = 0; i < size; i++) {
+ Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
+ // Evict Read-only data
+ std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
+ clockEdge(), addr, (uint8_t*) 0, 0, 0,
+ RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
+ nullptr);
+ assert(m_mandatory_q_ptr != NULL);
+ m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+ m_outstanding_inv++;
+ }
+ // Walk the cache
+ for(int i = 0; i< size; i++) {
+ Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
+ // Write dirty data back
+ std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
+ clockEdge(), addr, (uint8_t*) 0, 0, 0,
+ RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
+ nullptr);
+ assert(m_mandatory_q_ptr != NULL);
+ m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+ m_outstanding_wb++;
+ }
+}
diff --git a/src/mem/ruby/system/VIPERCoalescer.hh b/src/mem/ruby/system/VIPERCoalescer.hh
new file mode 100644
index 000000000..af6e44e7f
--- /dev/null
+++ b/src/mem/ruby/system/VIPERCoalescer.hh
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#ifndef __MEM_RUBY_SYSTEM_VI_COALESCER_HH__
+#define __MEM_RUBY_SYSTEM_VI_COALESCER_HH__
+
+#include <iostream>
+
+#include "mem/protocol/PrefetchBit.hh"
+#include "mem/protocol/RubyAccessMode.hh"
+#include "mem/protocol/RubyRequestType.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Consumer.hh"
+#include "mem/ruby/system/GPUCoalescer.hh"
+#include "mem/ruby/system/RubyPort.hh"
+
+class DataBlock;
+class CacheMsg;
+class MachineID;
+class CacheMemory;
+
+class VIPERCoalescerParams;
+
+class VIPERCoalescer : public GPUCoalescer
+{
+ public:
+ typedef VIPERCoalescerParams Params;
+ VIPERCoalescer(const Params *);
+ ~VIPERCoalescer();
+ void wbCallback(Addr address);
+ void invCallback(Addr address);
+ RequestStatus makeRequest(PacketPtr pkt);
+ private:
+ void invL1();
+ void wbL1();
+ void invwbL1();
+ uint64_t m_outstanding_inv;
+ uint64_t m_outstanding_wb;
+ uint64_t m_max_inv_per_cycle;
+ uint64_t m_max_wb_per_cycle;
+};
+#endif // __MEM_RUBY_SYSTEM_VI_COALESCER_HH__
+
diff --git a/src/mem/ruby/system/VIPERCoalescer.py b/src/mem/ruby/system/VIPERCoalescer.py
new file mode 100644
index 000000000..05c74386f
--- /dev/null
+++ b/src/mem/ruby/system/VIPERCoalescer.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2015 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Steve Reinhardt
+# Brad Beckmann
+
+from m5.params import *
+from m5.proxy import *
+from GPUCoalescer import *
+
+class VIPERCoalescer(RubyGPUCoalescer):
+ type = 'VIPERCoalescer'
+ cxx_class = 'VIPERCoalescer'
+ cxx_header = "mem/ruby/system/VIPERCoalescer.hh"
+ max_inv_per_cycle = Param.Int(32, "max invalidations per cycle")
+ max_wb_per_cycle = Param.Int(32, "max writebacks per cycle")
+ assume_rfo = False
diff --git a/src/mem/ruby/system/WeightedLRUPolicy.cc b/src/mem/ruby/system/WeightedLRUPolicy.cc
new file mode 100644
index 000000000..5baa4d9a5
--- /dev/null
+++ b/src/mem/ruby/system/WeightedLRUPolicy.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Derek Hower
+ */
+
+#include "mem/ruby/system/WeightedLRUPolicy.hh"
+
+WeightedLRUPolicy::WeightedLRUPolicy(const Params* p)
+ : AbstractReplacementPolicy(p), m_cache(p->cache)
+{
+ m_last_occ_ptr = new int*[m_num_sets];
+ for(unsigned i = 0; i < m_num_sets; i++){
+ m_last_occ_ptr[i] = new int[m_assoc];
+ for(unsigned j = 0; j < m_assoc; j++){
+ m_last_occ_ptr[i][j] = 0;
+ }
+ }
+}
+
+WeightedLRUPolicy *
+WeightedLRUReplacementPolicyParams::create()
+{
+ return new WeightedLRUPolicy(this);
+}
+
+WeightedLRUPolicy::~WeightedLRUPolicy()
+{
+ if (m_last_occ_ptr != NULL){
+ for (unsigned i = 0; i < m_num_sets; i++){
+ if (m_last_occ_ptr[i] != NULL){
+ delete[] m_last_occ_ptr[i];
+ }
+ }
+ delete[] m_last_occ_ptr;
+ }
+}
+
+void
+WeightedLRUPolicy::touch(int64_t set, int64_t index, Tick time)
+{
+ assert(index >= 0 && index < m_assoc);
+ assert(set >= 0 && set < m_num_sets);
+
+ m_last_ref_ptr[set][index] = time;
+}
+
+void
+WeightedLRUPolicy::touch(int64_t set, int64_t index, Tick time, int occupancy)
+{
+ assert(index >= 0 && index < m_assoc);
+ assert(set >= 0 && set < m_num_sets);
+
+ m_last_ref_ptr[set][index] = time;
+ m_last_occ_ptr[set][index] = occupancy;
+}
+
+int64_t
+WeightedLRUPolicy::getVictim(int64_t set) const
+{
+ Tick time, smallest_time;
+ int64_t smallest_index;
+
+ smallest_index = 0;
+ smallest_time = m_last_ref_ptr[set][0];
+ int smallest_weight = m_last_ref_ptr[set][0];
+
+ for (unsigned i = 1; i < m_assoc; i++) {
+
+ int weight = m_last_occ_ptr[set][i];
+ if (weight < smallest_weight) {
+ smallest_weight = weight;
+ smallest_index = i;
+ smallest_time = m_last_ref_ptr[set][i];
+ } else if (weight == smallest_weight) {
+ time = m_last_ref_ptr[set][i];
+ if (time < smallest_time) {
+ smallest_index = i;
+ smallest_time = time;
+ }
+ }
+ }
+ return smallest_index;
+}
diff --git a/src/mem/ruby/system/WeightedLRUPolicy.hh b/src/mem/ruby/system/WeightedLRUPolicy.hh
new file mode 100644
index 000000000..3150779b2
--- /dev/null
+++ b/src/mem/ruby/system/WeightedLRUPolicy.hh
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#ifndef __MEM_RUBY_SYSTEM_WEIGHTEDLRUPOLICY_HH__
+#define __MEM_RUBY_SYSTEM_WEIGHTEDLRUPOLICY_HH__
+
+#include "mem/ruby/structures/AbstractReplacementPolicy.hh"
+#include "mem/ruby/structures/CacheMemory.hh"
+#include "params/WeightedLRUReplacementPolicy.hh"
+
+/* Simple true LRU replacement policy */
+
+class WeightedLRUPolicy : public AbstractReplacementPolicy
+{
+ public:
+ typedef WeightedLRUReplacementPolicyParams Params;
+ WeightedLRUPolicy(const Params* p);
+ ~WeightedLRUPolicy();
+
+ void touch(int64_t set, int64_t way, Tick time);
+ void touch(int64_t set, int64_t way, Tick time, int occupancy);
+ int64_t getVictim(int64_t set) const override;
+
+ bool useOccupancy() const { return true; }
+
+ CacheMemory * m_cache;
+ int **m_last_occ_ptr;
+};
+
+#endif // __MEM_RUBY_SYSTEM_WeightedLRUPolicy_HH__
diff --git a/src/mem/ruby/system/WeightedLRUReplacementPolicy.py b/src/mem/ruby/system/WeightedLRUReplacementPolicy.py
new file mode 100644
index 000000000..e7de33496
--- /dev/null
+++ b/src/mem/ruby/system/WeightedLRUReplacementPolicy.py
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: Derek Hower
+#
+
+from m5.params import *
+from m5.proxy import *
+from MemObject import MemObject
+from ReplacementPolicy import ReplacementPolicy
+
+class WeightedLRUReplacementPolicy(ReplacementPolicy):
+ type = "WeightedLRUReplacementPolicy"
+ cxx_class = "WeightedLRUPolicy"
+ cxx_header = "mem/ruby/system/WeightedLRUPolicy.hh"
+ cache = Param.RubyCache("")
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py
index a530307ee..fc3f32c3d 100644
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -35,13 +35,17 @@ import re
python_class_map = {
"int": "Int",
+ "NodeID": "Int",
"uint32_t" : "UInt32",
"std::string": "String",
"bool": "Bool",
"CacheMemory": "RubyCache",
"WireBuffer": "RubyWireBuffer",
"Sequencer": "RubySequencer",
+ "GPUCoalescer" : "RubyGPUCoalescer",
+ "VIPERCoalescer" : "VIPERCoalescer",
"DirectoryMemory": "RubyDirectoryMemory",
+ "PerfectCacheMemory": "RubyPerfectCacheMemory",
"MemoryControl": "MemoryControl",
"MessageBuffer": "MessageBuffer",
"DMASequencer": "DMASequencer",
@@ -305,7 +309,7 @@ class $c_ident : public AbstractController
void collateStats();
void recordCacheTrace(int cntrl, CacheRecorder* tr);
- Sequencer* getSequencer() const;
+ Sequencer* getCPUSequencer() const;
int functionalWriteBuffers(PacketPtr&);
@@ -527,8 +531,14 @@ $c_ident::$c_ident(const Params *p)
else:
code('m_${{param.ident}} = p->${{param.ident}};')
- if re.compile("sequencer").search(param.ident):
- code('m_${{param.ident}}_ptr->setController(this);')
+ if re.compile("sequencer").search(param.ident) or \
+ param.type_ast.type.c_ident == "GPUCoalescer" or \
+ param.type_ast.type.c_ident == "VIPERCoalescer":
+ code('''
+if (m_${{param.ident}}_ptr != NULL) {
+ m_${{param.ident}}_ptr->setController(this);
+}
+''')
code('''
@@ -670,6 +680,28 @@ $c_ident::init()
assert(param.pointer)
seq_ident = "m_%s_ptr" % param.ident
+ if seq_ident != "NULL":
+ code('''
+Sequencer*
+$c_ident::getCPUSequencer() const
+{
+ if (NULL != $seq_ident && $seq_ident->isCPUSequencer()) {
+ return $seq_ident;
+ } else {
+ return NULL;
+ }
+}
+''')
+ else:
+ code('''
+
+Sequencer*
+$c_ident::getCPUSequencer() const
+{
+ return NULL;
+}
+''')
+
code('''
void
@@ -796,12 +828,6 @@ $c_ident::getMemoryQueue() const
return $memq_ident;
}
-Sequencer*
-$c_ident::getSequencer() const
-{
- return $seq_ident;
-}
-
void
$c_ident::print(ostream& out) const
{