gpu-compute: AMD's baseline GPU model

author: Tony Gutierrez <anthony.gutierrez@amd.com> 2016-01-19 14:28:22 -0500
committer: Tony Gutierrez <anthony.gutierrez@amd.com> 2016-01-19 14:28:22 -0500
commit: 1a7d3f9fcb76a68540dd948f91413533a383bfde (patch)
tree: 867510a147cd095f19499d26b7c02d27de4cae9d /src/mem
parent: 28e353e0403ea379d244a418e8dc8ee0b48187cf (diff)
download: gem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz
53 files changed, 25713 insertions, 75 deletions
diff --git a/src/mem/protocol/GPU_RfO-SQC.sm b/src/mem/protocol/GPU_RfO-SQC.sm
new file mode 100644
index 000000000..1e5f8df74
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO-SQC.sm
@@ -0,0 +1,667 @@
+/*
+ * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
+ : Sequencer* sequencer;
+   CacheMemory * L1cache;
+   int TCC_select_num_bits;
+   Cycles issue_latency := 80;  // time to send data down to TCC
+   Cycles l2_hit_latency := 18;
+
+  MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request";
+  MessageBuffer * responseFromSQC, network="To", virtual_network="3", vnet_type="response";
+  MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
+
+  MessageBuffer * probeToSQC, network="From", virtual_network="1", vnet_type="request";
+  MessageBuffer * responseToSQC, network="From", virtual_network="3", vnet_type="response";
+
+  MessageBuffer * mandatoryQueue;
+{
+  state_declaration(State, desc="SQC Cache States", default="SQC_State_I") {
+    I, AccessPermission:Invalid, desc="Invalid";
+    S, AccessPermission:Read_Only, desc="Shared";
+
+    I_S, AccessPermission:Busy, desc="Invalid, issued RdBlkS, have not seen response yet";
+    S_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack";
+    I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCCdir for canceled WB";
+  }
+
+  enumeration(Event, desc="SQC Events") {
+    // Core initiated
+    Fetch,          desc="Fetch";
+
+    //TCC initiated
+    TCC_AckS,        desc="TCC Ack to Core Request";
+    TCC_AckWB,       desc="TCC Ack for WB";
+    TCC_NackWB,       desc="TCC Nack for WB";
+
+    // Mem sys initiated
+    Repl,           desc="Replacing block from cache";
+
+    // Probe Events
+    PrbInvData,         desc="probe, return M data";
+    PrbInv,             desc="probe, no need for data";
+    PrbShrData,         desc="probe downgrade, return data";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff than memory)?";
+    DataBlock DataBlk,          desc="data for the block";
+    bool FromL2, default="false", desc="block just moved from L2";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,             desc="Transient state";
+    DataBlock DataBlk,       desc="data for the block, required for concurrent writebacks";
+    bool Dirty,              desc="Is the data dirty (different than memory)?";
+    int NumPendingMsgs,      desc="Number of acks/data messages that this processor is waiting for";
+    bool Shared,             desc="Victim hit by shared probe";
+   }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<SQC_TBE>", constructor="m_number_of_TBEs";
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  // Internal functions
+  Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
+    return cache_entry;
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return tbe.DataBlk;
+    } else {
+      return getCacheEntry(addr).DataBlk;
+    }
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if(is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return SQC_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return SQC_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(SQC_State_to_permission(state));
+    }
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+        L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+        L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+        L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+        L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  // Out Ports
+
+  out_port(requestNetwork_out, CPURequestMsg, requestFromSQC);
+  out_port(responseNetwork_out, ResponseMsg, responseFromSQC);
+  out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
+
+  // In Ports
+
+  in_port(probeNetwork_in, TDProbeRequestMsg, probeToSQC) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+      peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == ProbeRequestType:PrbInv) {
+          if (in_msg.ReturnData) {
+            trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+          assert(in_msg.ReturnData);
+          trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+  in_port(responseToSQC_in, ResponseMsg, responseToSQC) {
+    if (responseToSQC_in.isReady(clockEdge())) {
+      peek(responseToSQC_in, ResponseMsg, block_on="addr") {
+
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == CoherenceResponseType:TDSysResp) {
+          if (in_msg.State == CoherenceState:Shared) {
+            trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe);
+          } else {
+            error("SQC should not receive TDSysResp other than CoherenceState:Shared");
+          }
+        } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
+          trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) {
+          trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+    if (mandatoryQueue_in.isReady(clockEdge())) {
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+        Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+        TBE tbe := TBEs.lookup(in_msg.LineAddress);
+
+        assert(in_msg.Type == RubyRequestType:IFETCH);
+        if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+          trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe);
+        } else {
+          Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+          trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+        }
+      }
+    }
+  }
+
+  // Actions
+
+  action(ic_invCache, "ic", desc="invalidate cache") {
+    if(is_valid(cache_entry)) {
+      L1cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkS;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(vc_victim, "vc", desc="Victimize E/S Data") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicClean;
+      out_msg.InitialRequestTime := curCycle();
+      if (cache_entry.CacheState == State:S) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(a_allocate, "a", desc="allocate block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L1cache.allocate(address, new Entry));
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    assert(is_valid(cache_entry));
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+    tbe.DataBlk := cache_entry.DataBlk;  // Data only used for WBs
+    tbe.Dirty := cache_entry.Dirty;
+    tbe.Shared := false;
+  }
+
+  action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+    mandatoryQueue_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+    responseToSQC_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+
+  action(l_loadDone, "l", desc="local load done") {
+    assert(is_valid(cache_entry));
+    sequencer.readCallback(address, cache_entry.DataBlk,
+                           false, MachineType:L1Cache);
+    APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
+  }
+
+  action(xl_loadDone, "xl", desc="remote load done") {
+    peek(responseToSQC_in, ResponseMsg) {
+      assert(is_valid(cache_entry));
+      sequencer.readCallback(address,
+                             cache_entry.DataBlk,
+                             false,
+                             machineIDToMachineType(in_msg.Sender),
+                             in_msg.InitialRequestTime,
+                             in_msg.ForwardRequestTime,
+                             in_msg.ProbeRequestStartTime);
+      APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
+    }
+  }
+
+  action(w_writeCache, "w", desc="write data to cache") {
+    peek(responseToSQC_in, ResponseMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+    peek(responseToSQC_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:StaleNotif;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                TCC_select_low_bit, TCC_select_num_bits));
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(wb_data, "wb", desc="write back data") {
+    peek(responseToSQC_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:CPUData;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                TCC_select_low_bit, TCC_select_num_bits));
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Dirty := tbe.Dirty;
+        if (tbe.Shared) {
+          out_msg.NbReqShared := true;
+        } else {
+          out_msg.NbReqShared := false;
+        }
+        out_msg.State := CoherenceState:Shared; // faux info
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;
+      out_msg.Ntsl := true;
+      out_msg.Hit := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;  // only true if sending back data i think
+      out_msg.Hit := false;
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry) || is_valid(tbe));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.DataBlk := getDataBlock(address);
+      if (is_valid(tbe)) {
+        out_msg.Dirty := tbe.Dirty;
+      } else {
+        out_msg.Dirty := cache_entry.Dirty;
+      }
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry) || is_valid(tbe));
+      assert(is_valid(cache_entry));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.DataBlk := getDataBlock(address);
+      if (is_valid(tbe)) {
+        out_msg.Dirty := tbe.Dirty;
+      } else {
+        out_msg.Dirty := cache_entry.Dirty;
+      }
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
+    assert(is_valid(tbe));
+    tbe.Shared := true;
+  }
+
+  action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+    enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
+    probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+    mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  // Transitions
+
+  // transitions from base
+  transition(I, Fetch, I_S) {TagArrayRead, TagArrayWrite} {
+    a_allocate;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  // simple hit transitions
+  transition(S, Fetch) {TagArrayRead, DataArrayRead} {
+    l_loadDone;
+    p_popMandatoryQueue;
+  }
+
+  // recycles from transients
+  transition({I_S, S_I, I_C}, {Fetch, Repl}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition(S, Repl, S_I) {TagArrayRead} {
+    t_allocateTBE;
+    vc_victim;
+    ic_invCache;
+  }
+
+  // TCC event
+  transition(I_S, TCC_AckS, S) {DataArrayRead, DataArrayWrite} {
+    w_writeCache;
+    xl_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(S_I, TCC_NackWB, I){TagArrayWrite} {
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(S_I, TCC_AckWB, I) {TagArrayWrite} {
+    wb_data;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(I_C, TCC_AckWB, I){TagArrayWrite} {
+    ss_sendStaleNotification;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(I_C, TCC_NackWB, I) {TagArrayWrite} {
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  // Probe transitions
+  transition({S, I}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
+    pd_sendProbeResponseData;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, PrbInvData, I_C) {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition({S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition({S}, PrbShrData, S) {DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({I, I_C}, PrbShrData) {TagArrayRead} {
+    prm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, PrbInv, I_C){
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(I_S, {PrbInv, PrbInvData}) {} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    a_allocate;  // but make sure there is room for incoming data when it arrives
+    pp_popProbeQueue;
+  }
+
+  transition(I_S, PrbShrData) {} {
+    prm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  transition(S_I, PrbInvData, I_C) {TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(S_I, PrbInv, I_C) {TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(S_I, PrbShrData) {DataArrayRead} {
+    pd_sendProbeResponseData;
+    sf_setSharedFlip;
+    pp_popProbeQueue;
+  }
+}
diff --git a/src/mem/protocol/GPU_RfO-TCC.sm b/src/mem/protocol/GPU_RfO-TCC.sm
new file mode 100644
index 000000000..cfddb3f00
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO-TCC.sm
@@ -0,0 +1,1199 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:TCC, "TCC Cache")
+ : CacheMemory * L2cache;
+   WireBuffer * w_reqToTCCDir;
+   WireBuffer * w_respToTCCDir;
+   WireBuffer * w_TCCUnblockToTCCDir;
+   WireBuffer * w_reqToTCC;
+   WireBuffer * w_probeToTCC;
+   WireBuffer * w_respToTCC;
+   int TCC_select_num_bits;
+   Cycles l2_request_latency := 1;
+   Cycles l2_response_latency := 20;
+
+  // To the general response network
+  MessageBuffer * responseFromTCC, network="To", virtual_network="3", vnet_type="response";
+
+  // From the general response network
+  MessageBuffer * responseToTCC, network="From", virtual_network="3", vnet_type="response";
+
+{
+  // EVENTS
+  enumeration(Event, desc="TCC Events") {
+    // Requests coming from the Cores
+    RdBlk,                  desc="CPU RdBlk event";
+    RdBlkM,                 desc="CPU RdBlkM event";
+    RdBlkS,                 desc="CPU RdBlkS event";
+    CtoD,                   desc="Change to Dirty request";
+    WrVicBlk,               desc="L1 Victim (dirty)";
+    WrVicBlkShared,               desc="L1 Victim (dirty)";
+    ClVicBlk,               desc="L1 Victim (clean)";
+    ClVicBlkShared,               desc="L1 Victim (clean)";
+
+    CPUData,                      desc="WB data from CPU";
+    CPUDataShared,                desc="WB data from CPU, NBReqShared 1";
+    StaleWB,                desc="Stale WB, No data";
+
+    L2_Repl,             desc="L2 Replacement";
+
+    // Probes
+    PrbInvData,         desc="Invalidating probe, return dirty data";
+    PrbInv,             desc="Invalidating probe, no need to return data";
+    PrbShrData,         desc="Downgrading probe, return data";
+
+    // Coming from Memory Controller
+    WBAck,                     desc="ack from memory";
+
+    CancelWB,                   desc="Cancel WB from L2";
+  }
+
+  // STATES
+  state_declaration(State, desc="TCC State", default="TCC_State_I") {
+    M, AccessPermission:Read_Write, desc="Modified";  // No other cache has copy, memory stale
+    O, AccessPermission:Read_Only, desc="Owned";     // Correct most recent copy, others may exist in S
+    E, AccessPermission:Read_Write, desc="Exclusive"; // Correct, most recent, and only copy (and == Memory)
+    S, AccessPermission:Read_Only, desc="Shared";    // Correct, most recent. If no one in O, then == Memory
+    I, AccessPermission:Invalid, desc="Invalid";
+
+    I_M, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
+    I_O, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
+    I_E, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
+    I_S, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
+    S_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to M";
+    S_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
+    S_E, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to E";
+    S_S, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to S";
+    E_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
+    E_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
+    E_E, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
+    E_S, AccessPermission:Busy, desc="Shared, received WrVicBlk, sent Ack, waiting for Data";
+    O_M, AccessPermission:Busy, desc="...";
+    O_O, AccessPermission:Busy, desc="...";
+    O_E, AccessPermission:Busy, desc="...";
+    M_M, AccessPermission:Busy, desc="...";
+    M_O, AccessPermission:Busy, desc="...";
+    M_E, AccessPermission:Busy, desc="...";
+    M_S, AccessPermission:Busy, desc="...";
+    D_I, AccessPermission:Invalid,  desc="drop WB data on the floor when receive";
+    MOD_I, AccessPermission:Busy, desc="drop WB data on the floor, waiting for WBAck from Mem";
+    MO_I, AccessPermission:Busy, desc="M or O, received L2_Repl, waiting for WBAck from Mem";
+    ES_I, AccessPermission:Busy, desc="E or S, received L2_Repl, waiting for WBAck from Mem";
+    I_C, AccessPermission:Invalid, desc="sent cancel, just waiting to receive mem wb ack so nothing gets confused";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+
+  // STRUCTURES
+
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff from memory?)";
+    DataBlock DataBlk,          desc="Data for the block";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,     desc="Transient state";
+    DataBlock DataBlk,  desc="data for the block";
+    bool Dirty,         desc="Is the data dirty?";
+    bool Shared,        desc="Victim hit by shared probe";
+    MachineID From,     desc="Waiting for writeback from...";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs";
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+
+
+  // FUNCTION DEFINITIONS
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+    return static_cast(Entry, "pointer", L2cache.lookup(addr));
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    return getCacheEntry(addr).DataBlk;
+  }
+
+  bool presentOrAvail(Addr addr) {
+    return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if (is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+        tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+        cache_entry.CacheState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return TCC_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return TCC_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(TCC_State_to_permission(state));
+    }
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+        L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+        L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+        L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+        L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+
+
+  // OUT PORTS
+  out_port(w_requestNetwork_out, CPURequestMsg, w_reqToTCCDir);
+  out_port(w_TCCResp_out, ResponseMsg, w_respToTCCDir);
+  out_port(responseNetwork_out, ResponseMsg, responseFromTCC);
+  out_port(w_unblockNetwork_out, UnblockMsg, w_TCCUnblockToTCCDir);
+
+  // IN PORTS
+  in_port(TDResponse_in, ResponseMsg, w_respToTCC) {
+    if (TDResponse_in.isReady(clockEdge())) {
+      peek(TDResponse_in, ResponseMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
+          trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+        }
+        else {
+          DPRINTF(RubySlicc, "%s\n", in_msg);
+          error("Error on TDResponse Type");
+        }
+      }
+    }
+  }
+
+  // Response Network
+  in_port(responseNetwork_in, ResponseMsg, responseToTCC) {
+    if (responseNetwork_in.isReady(clockEdge())) {
+      peek(responseNetwork_in, ResponseMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:CPUData) {
+          if (in_msg.NbReqShared) {
+            trigger(Event:CPUDataShared, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:CPUData, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+            trigger(Event:StaleWB, in_msg.addr, cache_entry, tbe);
+        } else {
+          DPRINTF(RubySlicc, "%s\n", in_msg);
+          error("Error on TDResponse Type");
+        }
+      }
+    }
+  }
+
+  // probe network
+  in_port(probeNetwork_in, TDProbeRequestMsg, w_probeToTCC) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+      peek(probeNetwork_in, TDProbeRequestMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        if (in_msg.Type == ProbeRequestType:PrbInv) {
+          if (in_msg.ReturnData) {
+            trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+          if (in_msg.ReturnData) {
+            trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+          } else {
+            error("Don't think I should get any of these");
+          }
+        }
+      }
+    }
+  }
+
+  // Request Network
+  in_port(requestNetwork_in, CPURequestMsg, w_reqToTCC) {
+    if (requestNetwork_in.isReady(clockEdge())) {
+      peek(requestNetwork_in, CPURequestMsg) {
+        assert(in_msg.Destination.isElement(machineID));
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        if (in_msg.Type == CoherenceRequestType:RdBlk) {
+          trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+          trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+          trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+          if (presentOrAvail(in_msg.addr)) {
+            if (in_msg.Shared) {
+              trigger(Event:ClVicBlkShared, in_msg.addr, cache_entry, tbe);
+            } else {
+              trigger(Event:ClVicBlk, in_msg.addr, cache_entry, tbe);
+            }
+          } else {
+            Addr victim :=  L2cache.cacheProbe(in_msg.addr);
+            trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+          if (presentOrAvail(in_msg.addr)) {
+            if (in_msg.Shared) {
+              trigger(Event:WrVicBlkShared, in_msg.addr, cache_entry, tbe);
+            } else {
+              trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
+            }
+          } else {
+            Addr victim := L2cache.cacheProbe(in_msg.addr);
+            trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        } else {
+            requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+        }
+      }
+    }
+  }
+
+  // BEGIN ACTIONS
+
+  action(i_invL2, "i", desc="invalidate TCC cache block") {
+    if (is_valid(cache_entry)) {
+        L2cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(rm_sendResponseM, "rm", desc="send Modified response") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.Dirty := cache_entry.Dirty;
+        out_msg.State := CoherenceState:Modified;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(rs_sendResponseS, "rs", desc="send Shared response") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.Dirty := cache_entry.Dirty;
+        out_msg.State := CoherenceState:Shared;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+
+  action(r_requestToTD, "r", desc="Miss in L2, pass on") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
+        out_msg.addr := address;
+        out_msg.Type := in_msg.Type;
+        out_msg.Requestor := in_msg.Requestor;
+        out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                                TCC_select_low_bit, TCC_select_num_bits));
+        out_msg.Shared := false; // unneeded for this request
+        out_msg.MessageSize := in_msg.MessageSize;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+    if (is_valid(cache_entry)) {
+      tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
+      tbe.Dirty := cache_entry.Dirty;
+    }
+    tbe.From := machineID;
+  }
+
+  action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(vc_vicClean, "vc", desc="Victimize Clean L2 data") {
+    enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:VicClean;
+      out_msg.Requestor := machineID;
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(vd_vicDirty, "vd", desc="Victimize dirty L2 data") {
+    enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:VicDirty;
+      out_msg.Requestor := machineID;
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysWBAck;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+      }
+    }
+  }
+
+  action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+    enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(ph_sendProbeResponseHit, "ph", desc="send probe ack, no data") {
+    enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;
+      out_msg.Hit := true;
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pm_sendProbeResponseMiss, "pm", desc="send probe ack, no data") {
+    enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+    enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.DataBlk := cache_entry.DataBlk;
+      //assert(cache_entry.Dirty); Not needed in TCC where TCC can supply clean data
+      out_msg.Dirty := cache_entry.Dirty;
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") {
+    enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.DataBlk := tbe.DataBlk;
+      //assert(tbe.Dirty);
+      out_msg.Dirty := tbe.Dirty;
+      out_msg.Hit := true;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.State := CoherenceState:NA;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(mc_cancelMemWriteback, "mc", desc="send writeback cancel to memory") {
+    enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:WrCancel;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+    }
+  }
+
+  action(a_allocateBlock, "a", desc="allocate TCC block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L2cache.allocate(address, new Entry));
+    }
+  }
+
+  action(d_writeData, "d", desc="write data to TCC") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (in_msg.Dirty) {
+        cache_entry.Dirty := in_msg.Dirty;
+      }
+      cache_entry.DataBlk := in_msg.DataBlk;
+      DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+    }
+  }
+
+  action(rd_copyDataFromRequest, "rd", desc="write data to TCC") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := true;
+    }
+  }
+
+  action(f_setFrom, "f", desc="set who WB is expected to come from") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      tbe.From := in_msg.Requestor;
+    }
+  }
+
+  action(rf_resetFrom, "rf", desc="reset From") {
+    tbe.From := machineID;
+  }
+
+  action(wb_data, "wb", desc="write back data") {
+    enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUData;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.Dirty := tbe.Dirty;
+      if (tbe.Shared) {
+        out_msg.NbReqShared := true;
+      } else {
+        out_msg.NbReqShared := false;
+      }
+      out_msg.State := CoherenceState:Shared; // faux info
+      out_msg.MessageSize := MessageSizeType:Writeback_Data;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(wt_writeDataToTBE, "wt", desc="write WB data to TBE") {
+    peek(responseNetwork_in, ResponseMsg) {
+      tbe.DataBlk := in_msg.DataBlk;
+      tbe.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(uo_sendUnblockOwner, "uo", desc="state changed to E, M, or O, unblock") {
+    enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      out_msg.currentOwner := true;
+      out_msg.valid := true;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(us_sendUnblockSharer, "us", desc="state changed to S , unblock") {
+    enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      out_msg.currentOwner := false;
+      out_msg.valid := true;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(un_sendUnblockNotValid, "un", desc="state changed toI, unblock") {
+    enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      out_msg.currentOwner := false;
+      out_msg.valid := false;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+    L2cache.setMRU(address);
+  }
+
+  action(p_popRequestQueue, "p", desc="pop request queue") {
+    requestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="pop response queue") {
+    responseNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pn_popTDResponseQueue, "pn", desc="pop TD response queue") {
+    TDResponse_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+
+  action(zz_recycleRequestQueue, "\z", desc="recycle request queue") {
+    requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+
+  // END ACTIONS
+
+  // BEGIN TRANSITIONS
+
+  // transitions from base
+
+  transition({I, I_C}, {RdBlk, RdBlkS, RdBlkM, CtoD}){TagArrayRead} {
+    // TCCdir already knows that the block is not here. This is to allocate and get the block.
+    r_requestToTD;
+    p_popRequestQueue;
+  }
+
+// check
+  transition({M, O}, RdBlk, O){TagArrayRead, TagArrayWrite} {
+    rs_sendResponseS;
+    ut_updateTag;
+    // detect 2nd chancing
+    p_popRequestQueue;
+  }
+
+//check
+  transition({E, S}, RdBlk, S){TagArrayRead, TagArrayWrite} {
+    rs_sendResponseS;
+    ut_updateTag;
+    // detect 2nd chancing
+    p_popRequestQueue;
+  }
+
+// check
+  transition({M, O}, RdBlkS, O){TagArrayRead, TagArrayWrite} {
+    rs_sendResponseS;
+    ut_updateTag;
+    // detect 2nd chance sharing
+    p_popRequestQueue;
+  }
+
+//check
+  transition({E, S}, RdBlkS, S){TagArrayRead, TagArrayWrite} {
+    rs_sendResponseS;
+    ut_updateTag;
+    // detect 2nd chance sharing
+    p_popRequestQueue;
+  }
+
+// check
+  transition(M, RdBlkM, I){TagArrayRead, TagArrayWrite} {
+    rm_sendResponseM;
+    i_invL2;
+    p_popRequestQueue;
+  }
+
+  //check
+  transition(E, RdBlkM, I){TagArrayRead, TagArrayWrite} {
+    rm_sendResponseM;
+    i_invL2;
+    p_popRequestQueue;
+  }
+
+// check
+  transition({I}, WrVicBlk, I_M){TagArrayRead} {
+    a_allocateBlock;
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(I_C, {WrVicBlk, WrVicBlkShared, ClVicBlk, ClVicBlkShared}) {
+    zz_recycleRequestQueue;
+  }
+
+//check
+  transition({I}, WrVicBlkShared, I_O) {TagArrayRead}{
+    a_allocateBlock;
+    t_allocateTBE;
+    f_setFrom;
+//    rd_copyDataFromRequest;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+//check
+  transition(S, WrVicBlkShared, S_O){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+// a stale writeback
+ transition(S, WrVicBlk, S_S){TagArrayRead} {
+   t_allocateTBE;
+   f_setFrom;
+   w_sendResponseWBAck;
+   p_popRequestQueue;
+ }
+
+// a stale writeback
+  transition(E, WrVicBlk, E_E){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+// a stale writeback
+  transition(E, WrVicBlkShared, E_E){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+// a stale writeback
+  transition(O, WrVicBlk, O_O){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+// a stale writeback
+ transition(O, WrVicBlkShared, O_O){TagArrayRead} {
+   t_allocateTBE;
+   f_setFrom;
+   w_sendResponseWBAck;
+   p_popRequestQueue;
+ }
+
+// a stale writeback
+  transition(M, WrVicBlk, M_M){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+// a stale writeback
+  transition(M, WrVicBlkShared, M_O){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+//check
+  transition({I}, ClVicBlk, I_E){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    a_allocateBlock;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition({I}, ClVicBlkShared, I_S){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    a_allocateBlock;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+//check
+  transition(S, ClVicBlkShared, S_S){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+// a stale writeback
+  transition(E, ClVicBlk, E_E){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+// a stale writeback
+  transition(E, ClVicBlkShared, E_S){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+// a stale writeback
+ transition(O, ClVicBlk, O_O){TagArrayRead} {
+   t_allocateTBE;
+   f_setFrom;
+   w_sendResponseWBAck;
+   p_popRequestQueue;
+ }
+
+// check. Original L3 ahd it going from O to O_S. Something can go from O to S only on writeback.
+  transition(O, ClVicBlkShared, O_O){TagArrayRead} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+// a stale writeback
+ transition(M, ClVicBlk, M_E){TagArrayRead} {
+   t_allocateTBE;
+   f_setFrom;
+   w_sendResponseWBAck;
+   p_popRequestQueue;
+ }
+
+// a stale writeback
+ transition(M, ClVicBlkShared, M_S){TagArrayRead} {
+   t_allocateTBE;
+   f_setFrom;
+   w_sendResponseWBAck;
+   p_popRequestQueue;
+ }
+
+
+  transition({MO_I}, {RdBlk, RdBlkS, RdBlkM, CtoD}) {
+    a_allocateBlock;
+    t_allocateTBE;
+    f_setFrom;
+    r_requestToTD;
+    p_popRequestQueue;
+  }
+
+  transition(MO_I, {WrVicBlkShared, WrVicBlk, ClVicBlk, ClVicBlkShared}, MOD_I) {
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(I_M, CPUData, M){TagArrayWrite} {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite} {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite}  {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E, CPUData, E){TagArrayWrite, DataArrayWrite}  {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite}  {
+    us_sendUnblockSharer;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite}  {
+    us_sendUnblockSharer;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(S_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite}  {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(S_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite}  {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(S_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite}  {
+    us_sendUnblockSharer;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(S_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite}  {
+    us_sendUnblockSharer;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(O_E, CPUDataShared, O){TagArrayWrite, DataArrayWrite}  {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(O_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite}  {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition({D_I}, {CPUData, CPUDataShared}, I){TagArrayWrite}  {
+    un_sendUnblockNotValid;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(MOD_I, {CPUData, CPUDataShared}, MO_I) {
+    un_sendUnblockNotValid;
+    rf_resetFrom;
+    pr_popResponseQueue;
+  }
+
+  transition({O,S,I}, CPUData) {
+    pr_popResponseQueue;
+  }
+
+  transition({M, O}, L2_Repl, MO_I){TagArrayRead, DataArrayRead} {
+    t_allocateTBE;
+    vd_vicDirty;
+    i_invL2;
+  }
+
+  transition({E, S,}, L2_Repl, ES_I){TagArrayRead, DataArrayRead} {
+    t_allocateTBE;
+    vc_vicClean;
+    i_invL2;
+  }
+
+  transition({I_M, I_O, S_M, S_O, E_M, E_O}, L2_Repl) {
+    zz_recycleRequestQueue;
+  }
+
+  transition({O_M, O_O, O_E, M_M, M_O, M_E, M_S}, L2_Repl) {
+    zz_recycleRequestQueue;
+  }
+
+  transition({I_E, I_S, S_E, S_S, E_E, E_S}, L2_Repl) {
+    zz_recycleRequestQueue;
+  }
+
+  transition({M, O}, PrbInvData, I){TagArrayRead, TagArrayWrite} {
+    pd_sendProbeResponseData;
+    i_invL2;
+    pp_popProbeQueue;
+  }
+
+  transition(I, PrbInvData){TagArrayRead, TagArrayWrite}  {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition({E, S}, PrbInvData, I){TagArrayRead, TagArrayWrite}  {
+    pd_sendProbeResponseData;
+    i_invL2;
+    pp_popProbeQueue;
+  }
+
+  transition({M, O, E, S, I}, PrbInv, I){TagArrayRead, TagArrayWrite}  {
+    pi_sendProbeResponseInv;
+    i_invL2; // nothing will happen in I
+    pp_popProbeQueue;
+  }
+
+  transition({M, O}, PrbShrData, O){TagArrayRead, TagArrayWrite}  {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({E, S}, PrbShrData, S){TagArrayRead, TagArrayWrite}  {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition(I, PrbShrData){TagArrayRead}  {
+    pm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbInvData, I_C) {
+    pdt_sendProbeResponseDataFromTBE;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, PrbInvData, I_C) {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition({ES_I,MO_I}, PrbInv, I_C) {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition({ES_I, MO_I}, PrbShrData) {
+    pdt_sendProbeResponseDataFromTBE;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, {PrbInvData, PrbInv}) {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, PrbShrData) {
+    pm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  transition(MOD_I, WBAck, D_I) {
+    pn_popTDResponseQueue;
+  }
+
+  transition(MO_I, WBAck, I){TagArrayWrite} {
+    dt_deallocateTBE;
+    pn_popTDResponseQueue;
+  }
+
+  // this can only be a spurious CPUData from a shared block.
+  transition(MO_I, CPUData) {
+    pr_popResponseQueue;
+  }
+
+  transition(ES_I, WBAck, I){TagArrayWrite} {
+    dt_deallocateTBE;
+    pn_popTDResponseQueue;
+  }
+
+  transition(I_C, {WBAck}, I){TagArrayWrite} {
+    dt_deallocateTBE;
+    pn_popTDResponseQueue;
+  }
+
+  transition({I_M, I_O, I_E, I_S}, StaleWB, I){TagArrayWrite} {
+    un_sendUnblockNotValid;
+    dt_deallocateTBE;
+    i_invL2;
+    pr_popResponseQueue;
+  }
+
+  transition({S_S, S_O, S_M, S_E}, StaleWB, S){TagArrayWrite} {
+    us_sendUnblockSharer;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition({E_M, E_O, E_E, E_S}, StaleWB, E){TagArrayWrite} {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition({O_M, O_O, O_E}, StaleWB, O){TagArrayWrite} {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition({M_M, M_O, M_E, M_S}, StaleWB, M){TagArrayWrite} {
+    uo_sendUnblockOwner;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(D_I, StaleWB, I) {TagArrayWrite}{
+    un_sendUnblockNotValid;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(MOD_I, StaleWB, MO_I) {
+    un_sendUnblockNotValid;
+    rf_resetFrom;
+    pr_popResponseQueue;
+  }
+
+}
diff --git a/src/mem/protocol/GPU_RfO-TCCdir.sm b/src/mem/protocol/GPU_RfO-TCCdir.sm
new file mode 100644
index 000000000..8f58d6ebb
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO-TCCdir.sm
@@ -0,0 +1,2672 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Mithuna Thottethodi
+ */
+
+machine(MachineType:TCCdir, "AMD read-for-ownership directory for TCC (aka GPU L2)")
+:  CacheMemory * directory;
+  // Convention: wire buffers are prefixed with "w_" for clarity
+  WireBuffer * w_reqToTCCDir;
+  WireBuffer * w_respToTCCDir;
+  WireBuffer * w_TCCUnblockToTCCDir;
+  WireBuffer * w_reqToTCC;
+  WireBuffer * w_probeToTCC;
+  WireBuffer * w_respToTCC;
+  int TCC_select_num_bits;
+  Cycles response_latency := 5;
+  Cycles directory_latency := 6;
+  Cycles issue_latency := 120;
+
+  // From the TCPs or SQCs
+  MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request";
+  MessageBuffer * responseFromTCP, network="From", virtual_network="3", vnet_type="response";
+  MessageBuffer * unblockFromTCP, network="From", virtual_network="5", vnet_type="unblock";
+
+  // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC.
+  MessageBuffer * probeToCore, network="To", virtual_network="1", vnet_type="request";
+  MessageBuffer * responseToCore, network="To", virtual_network="3", vnet_type="response";
+
+  // From the NB
+  MessageBuffer * probeFromNB, network="From", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseFromNB, network="From", virtual_network="2", vnet_type="response";
+  // To the NB
+  MessageBuffer * requestToNB, network="To", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseToNB, network="To", virtual_network="2", vnet_type="response";
+  MessageBuffer * unblockToNB, network="To", virtual_network="4", vnet_type="unblock";
+
+  MessageBuffer * triggerQueue, random="false";
+{
+  // STATES
+  state_declaration(State, desc="Directory states", default="TCCdir_State_I") {
+    // Base states
+    I, AccessPermission:Invalid, desc="Invalid";
+    S, AccessPermission:Invalid, desc="Shared";
+    E, AccessPermission:Invalid, desc="Shared";
+    O, AccessPermission:Invalid, desc="Owner";
+    M, AccessPermission:Invalid, desc="Modified";
+
+    CP_I, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to invalid";
+    B_I, AccessPermission:Invalid, desc="Blocked, need not send data after acks are in, going to invalid";
+    CP_O, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to owned";
+    CP_S, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to shared";
+    CP_OM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to O_M";
+    CP_SM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to S_M";
+    CP_ISM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M";
+    CP_IOM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M";
+    CP_OSIW, AccessPermission:Invalid, desc="Blocked, must send data after acks+CancelWB are in, going to I_C";
+
+
+    // Transient states and busy states used for handling side (TCC-facing) interactions
+    BW_S, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
+    BW_E, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
+    BW_O, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
+    BW_M, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
+
+    // Transient states and busy states used for handling upward (TCP-facing) interactions
+    I_M, AccessPermission:Invalid, desc="Invalid, issued RdBlkM, have not seen response yet";
+    I_ES, AccessPermission:Invalid, desc="Invalid, issued RdBlk, have not seen response yet";
+    I_S, AccessPermission:Invalid, desc="Invalid, issued RdBlkS, have not seen response yet";
+    BBS_S, AccessPermission:Invalid, desc="Blocked, going from S to S";
+    BBO_O, AccessPermission:Invalid, desc="Blocked, going from O to O";
+    BBM_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for data to forward";
+    BBM_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for data to forward";
+    BB_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for unblock";
+    BB_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for unblock";
+    BB_OO, AccessPermission:Invalid, desc="Blocked, going from O to O (adding sharers), waiting for unblock";
+    BB_S, AccessPermission:Invalid, desc="Blocked, going to S, waiting for (possible multiple) unblock(s)";
+    BBS_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M";
+    BBO_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M";
+    BBS_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade";
+    BBO_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade";
+    S_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet";
+    O_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet";
+
+    //
+    BBB_S, AccessPermission:Invalid, desc="Blocked, going to S after core unblock";
+    BBB_M, AccessPermission:Invalid, desc="Blocked, going to M after core unblock";
+    BBB_E, AccessPermission:Invalid, desc="Blocked, going to E after core unblock";
+
+    VES_I, AccessPermission:Invalid, desc="TCC replacement, waiting for clean WB ack";
+    VM_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack";
+    VO_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack";
+    VO_S, AccessPermission:Invalid, desc="TCC owner replacement, waiting for dirty WB ack";
+
+    ES_I, AccessPermission:Invalid, desc="L1 replacement, waiting for clean WB ack";
+    MO_I, AccessPermission:Invalid, desc="L1 replacement, waiting for dirty WB ack";
+
+    I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB for canceled WB";
+    I_W, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB; canceled WB raced with directory invalidation";
+
+    // Recall States
+    BRWD_I, AccessPermission:Invalid, desc="Recalling, waiting for WBAck and Probe Data responses";
+    BRW_I, AccessPermission:Read_Write, desc="Recalling, waiting for WBAck";
+    BRD_I, AccessPermission:Invalid, desc="Recalling, waiting for Probe Data responses";
+
+  }
+
+ enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+
+
+  // EVENTS
+  enumeration(Event, desc="TCC Directory Events") {
+    // Upward facing events (TCCdir w.r.t. TCP/SQC and TCC behaves like NBdir behaves with TCP/SQC and L3
+
+    // Directory Recall
+    Recall,              desc="directory cache is full";
+    // CPU requests
+    CPUWrite,           desc="Initial req from core, sent to TCC";
+    NoCPUWrite,           desc="Initial req from core, but non-exclusive clean data; can be discarded";
+    CPUWriteCancel,           desc="Initial req from core, sent to TCC";
+
+    // Requests from the TCPs
+    RdBlk,                  desc="RdBlk event";
+    RdBlkM,                 desc="RdBlkM event";
+    RdBlkS,                 desc="RdBlkS event";
+    CtoD,                   desc="Change to Dirty request";
+
+    // TCC writebacks
+    VicDirty,           desc="...";
+    VicDirtyLast,           desc="...";
+    VicClean,           desc="...";
+    NoVic,           desc="...";
+    StaleVic,           desc="...";
+    CancelWB,           desc="TCC got invalidating probe, canceled WB";
+
+    // Probe Responses from TCP/SQCs
+    CPUPrbResp,     desc="Probe response from TCP/SQC";
+    TCCPrbResp,     desc="Probe response from TCC";
+
+    ProbeAcksComplete,	desc="All acks received";
+    ProbeAcksCompleteReissue,	desc="All acks received, changing CtoD to reissue";
+
+    CoreUnblock,		desc="unblock from TCP/SQC";
+    LastCoreUnblock,		desc="Last unblock from TCP/SQC";
+    TCCUnblock,			desc="unblock from TCC (current owner)";
+    TCCUnblock_Sharer,  desc="unblock from TCC (a sharer, not owner)";
+    TCCUnblock_NotValid,desc="unblock from TCC (not valid...caused by stale writebacks)";
+
+    // Downward facing events
+
+    // NB initiated
+    NB_AckS,        desc="NB Ack to TCC Request";
+    NB_AckE,        desc="NB Ack to TCC Request";
+    NB_AckM,        desc="NB Ack to TCC Request";
+    NB_AckCtoD,     desc="NB Ack to TCC Request";
+    NB_AckWB,       desc="NB Ack for clean WB";
+
+
+    // Incoming Probes from NB
+    PrbInvData,         desc="Invalidating probe, return dirty data";
+    PrbInv,             desc="Invalidating probe, no need to return data";
+    PrbShrData,         desc="Downgrading probe, return data";
+  }
+
+
+  // TYPES
+
+  // Entry for directory
+  structure(Entry, desc="...", interface='AbstractCacheEntry') {
+    State CacheState,          desc="Cache state (Cache of directory entries)";
+    DataBlock DataBlk,             desc="data for the block";
+    NetDest Sharers,                   desc="Sharers for this block";
+    NetDest Owner,                     desc="Owner of this block";
+    NetDest MergedSharers,             desc="Read sharers who are merged on a request";
+    int WaitingUnblocks,           desc="Number of acks we're waiting for";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,    desc="Transient state";
+    DataBlock DataBlk, desc="DataBlk";
+    bool Dirty,        desc="Is the data dirty?";
+    MachineID Requestor, desc="requestor";
+    int NumPendingAcks,        desc="num acks expected";
+    MachineID OriginalRequestor,        desc="Original Requestor";
+    MachineID UntransferredOwner,    desc = "Untransferred owner for an upgrade transaction";
+    bool UntransferredOwnerExists,    desc = "1 if Untransferred owner exists for an upgrade transaction";
+    bool Cached,        desc="data hit in Cache";
+    bool Shared,	desc="victim hit by shared probe";
+    bool Upgrade,	desc="An upgrade request in progress";
+    bool CtoD,	desc="Saved sysack info";
+    CoherenceState CohState, desc="Saved sysack info";
+    MessageSizeType MessageSize, desc="Saved sysack info";
+    MachineID Sender, desc="sender";
+  }
+
+  structure(TBETable, external = "yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  // ** OBJECTS **
+  TBETable TBEs, template="<TCCdir_TBE>", constructor="m_number_of_TBEs";
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  NetDest TCC_dir_subtree;
+  NetDest temp;
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+
+
+  bool presentOrAvail(Addr addr) {
+    return directory.isTagPresent(addr) || directory.cacheAvail(addr);
+  }
+
+  Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+    return static_cast(Entry, "pointer", directory.lookup(addr));
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return tbe.DataBlk;
+    } else {
+      assert(false);
+      return getCacheEntry(addr).DataBlk;
+    }
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if(is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+ void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(TCCdir_State_to_permission(state));
+    }
+  }
+
+ AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return TCCdir_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return TCCdir_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+
+      if (state == State:S) {
+        assert(cache_entry.Owner.count() == 0);
+      }
+
+      if (state == State:O) {
+        assert(cache_entry.Owner.count() == 1);
+        assert(cache_entry.Sharers.isSuperset(cache_entry.Owner) == false);
+      }
+
+      if (state == State:M) {
+        assert(cache_entry.Owner.count() == 1);
+        assert(cache_entry.Sharers.count() == 0);
+      }
+
+      if (state == State:E) {
+        assert(cache_entry.Owner.count() == 0);
+        assert(cache_entry.Sharers.count() == 1);
+      }
+    }
+  }
+
+
+
+ void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+        directory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+        directory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+        directory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+        directory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      return directory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      return directory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      return directory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return directory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  // ** OUT_PORTS **
+
+  // Three classes of ports
+  // Class 1: downward facing network links to NB
+  out_port(requestToNB_out, CPURequestMsg, requestToNB);
+  out_port(responseToNB_out, ResponseMsg, responseToNB);
+  out_port(unblockToNB_out, UnblockMsg, unblockToNB);
+
+
+  // Class 2: upward facing ports to GPU cores
+  out_port(probeToCore_out, TDProbeRequestMsg, probeToCore);
+  out_port(responseToCore_out, ResponseMsg, responseToCore);
+
+  // Class 3: sideward facing ports (on "wirebuffer" links) to TCC
+  out_port(w_requestTCC_out, CPURequestMsg, w_reqToTCC);
+  out_port(w_probeTCC_out, NBProbeRequestMsg, w_probeToTCC);
+  out_port(w_respTCC_out, ResponseMsg, w_respToTCC);
+
+
+  // local trigger port
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+
+  //
+  // request queue going to NB
+  //
+
+  // ** IN_PORTS **
+
+  // Trigger Queue
+  in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=8) {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        assert(is_valid(tbe));
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == false))  {
+          trigger(Event:ProbeAcksComplete, in_msg.addr, cache_entry, tbe);
+        } else if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == true))  {
+          trigger(Event:ProbeAcksCompleteReissue, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+  // Unblock Networks (TCCdir can receive unblocks from TCC, TCPs)
+  // Port on first (of three) wire buffers from TCC
+  in_port(w_TCCUnblock_in, UnblockMsg, w_TCCUnblockToTCCDir, rank=7) {
+    if (w_TCCUnblock_in.isReady(clockEdge())) {
+      peek(w_TCCUnblock_in, UnblockMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.currentOwner) {
+            trigger(Event:TCCUnblock, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.valid) {
+            trigger(Event:TCCUnblock_Sharer, in_msg.addr, cache_entry, tbe);
+        } else {
+            trigger(Event:TCCUnblock_NotValid, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+  in_port(unblockNetwork_in, UnblockMsg, unblockFromTCP, rank=6) {
+    if (unblockNetwork_in.isReady(clockEdge())) {
+      peek(unblockNetwork_in, UnblockMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if(cache_entry.WaitingUnblocks == 1) {
+          trigger(Event:LastCoreUnblock, in_msg.addr, cache_entry, tbe);
+        }
+        else {
+          trigger(Event:CoreUnblock, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+
+  //Responses from TCC, and Cores
+  // Port on second (of three) wire buffers from TCC
+  in_port(w_TCCResponse_in, ResponseMsg, w_respToTCCDir, rank=5) {
+    if (w_TCCResponse_in.isReady(clockEdge())) {
+      peek(w_TCCResponse_in, ResponseMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+          trigger(Event:TCCPrbResp, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+  in_port(responseNetwork_in, ResponseMsg, responseFromTCP, rank=4) {
+    if (responseNetwork_in.isReady(clockEdge())) {
+      peek(responseNetwork_in, ResponseMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+          trigger(Event:CPUPrbResp, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+
+  // Port on third (of three) wire buffers from TCC
+  in_port(w_TCCRequest_in, CPURequestMsg, w_reqToTCCDir, rank=3) {
+      if(w_TCCRequest_in.isReady(clockEdge())) {
+          peek(w_TCCRequest_in, CPURequestMsg) {
+              TBE tbe := TBEs.lookup(in_msg.addr);
+              Entry cache_entry := getCacheEntry(in_msg.addr);
+              if (in_msg.Type == CoherenceRequestType:WrCancel) {
+                  trigger(Event:CancelWB, in_msg.addr, cache_entry, tbe);
+              } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+                  if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) {
+                      // if modified, or owner with no other sharers
+                      if ((cache_entry.CacheState == State:M) || (cache_entry.Sharers.count() == 0)) {
+                          assert(cache_entry.Owner.count()==1);
+                          trigger(Event:VicDirtyLast, in_msg.addr, cache_entry, tbe);
+                      } else {
+                          trigger(Event:VicDirty, in_msg.addr, cache_entry, tbe);
+                      }
+                  } else {
+                      trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe);
+                  }
+              } else {
+                  if (in_msg.Type == CoherenceRequestType:VicClean) {
+                      if (is_valid(cache_entry) && cache_entry.Sharers.isElement(in_msg.Requestor)) {
+                          if (cache_entry.Sharers.count() == 1) {
+                              // Last copy, victimize to L3
+                              trigger(Event:VicClean, in_msg.addr, cache_entry, tbe);
+                          } else {
+                              // Either not the last copy or stall. No need to victimmize
+                              // remove sharer from sharer list
+                              assert(cache_entry.Sharers.count() > 1);
+                              trigger(Event:NoVic, in_msg.addr, cache_entry, tbe);
+                          }
+                      } else {
+                          trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe);
+                      }
+                  }
+              }
+          }
+      }
+    }
+
+  in_port(responseFromNB_in, ResponseMsg, responseFromNB, rank=2) {
+    if (responseFromNB_in.isReady(clockEdge())) {
+      peek(responseFromNB_in, ResponseMsg, block_on="addr") {
+
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+          if (in_msg.State == CoherenceState:Modified) {
+            if (in_msg.CtoD) {
+              trigger(Event:NB_AckCtoD, in_msg.addr, cache_entry, tbe);
+            } else {
+              trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe);
+            }
+          } else if (in_msg.State == CoherenceState:Shared) {
+            trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.State == CoherenceState:Exclusive) {
+            trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+          trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+
+  // Finally handling incoming requests (from TCP) and probes (from NB).
+
+  in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB, rank=1) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+      peek(probeNetwork_in, NBProbeRequestMsg) {
+        DPRINTF(RubySlicc, "%s\n", in_msg);
+        DPRINTF(RubySlicc, "machineID: %s\n", machineID);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == ProbeRequestType:PrbInv) {
+          if (in_msg.ReturnData) {
+            trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+          assert(in_msg.ReturnData);
+          trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+
+  in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) {
+    if (coreRequestNetwork_in.isReady(clockEdge())) {
+      peek(coreRequestNetwork_in, CPURequestMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (presentOrAvail(in_msg.addr)) {
+          if (in_msg.Type == CoherenceRequestType:VicDirty) {
+            trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+              if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) {
+                  trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+              } else if(is_valid(cache_entry) && (cache_entry.Sharers.count() + cache_entry.Owner.count() ) >1) {
+                  trigger(Event:NoCPUWrite, in_msg.addr, cache_entry, tbe);
+              } else {
+                  trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+              }
+          } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
+            trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+            trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+            trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:WrCancel) {
+            trigger(Event:CPUWriteCancel, in_msg.addr, cache_entry, tbe);
+          }
+        } else {
+          // All requests require a directory entry
+          Addr victim := directory.cacheProbe(in_msg.addr);
+          trigger(Event:Recall, victim, getCacheEntry(victim), TBEs.lookup(victim));
+        }
+      }
+    }
+  }
+
+
+
+
+  // Actions
+
+  //Downward facing actions
+
+  action(c_clearOwner, "c", desc="Clear the owner field") {
+    cache_entry.Owner.clear();
+  }
+
+  action(rS_removeRequesterFromSharers, "rS", desc="Remove unblocker from sharer list") {
+    peek(unblockNetwork_in, UnblockMsg) {
+      cache_entry.Sharers.remove(in_msg.Sender);
+    }
+  }
+
+  action(rT_removeTCCFromSharers, "rT", desc="Remove  TCC from sharer list") {
+    peek(w_TCCRequest_in, CPURequestMsg) {
+      cache_entry.Sharers.remove(in_msg.Requestor);
+    }
+  }
+
+  action(rO_removeOriginalRequestorFromSharers, "rO", desc="Remove replacing core from sharer list") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      cache_entry.Sharers.remove(in_msg.Requestor);
+    }
+  }
+
+  action(rC_removeCoreFromSharers, "rC", desc="Remove replacing core from sharer list") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      cache_entry.Sharers.remove(in_msg.Requestor);
+    }
+  }
+
+  action(rCo_removeCoreFromOwner, "rCo", desc="Remove replacing core from sharer list") {
+    // Note that under some cases this action will try to remove a stale owner
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      cache_entry.Owner.remove(in_msg.Requestor);
+    }
+  }
+
+  action(rR_removeResponderFromSharers, "rR", desc="Remove responder from sharer list") {
+    peek(responseNetwork_in, ResponseMsg) {
+      cache_entry.Sharers.remove(in_msg.Sender);
+    }
+  }
+
+  action(nC_sendNullWBAckToCore, "nC", desc = "send a null WB Ack to release core") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(responseToCore_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysWBNack;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.MessageSize := in_msg.MessageSize;
+      }
+   }
+  }
+
+  action(nT_sendNullWBAckToTCC, "nT", desc = "send a null WB Ack to release TCC") {
+    peek(w_TCCRequest_in, CPURequestMsg) {
+      enqueue(w_respTCC_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysWBAck;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.MessageSize := in_msg.MessageSize;
+      }
+    }
+  }
+
+  action(eto_moveExSharerToOwner, "eto", desc="move the current exclusive sharer to owner") {
+      assert(cache_entry.Sharers.count() == 1);
+      assert(cache_entry.Owner.count() == 0);
+      cache_entry.Owner := cache_entry.Sharers;
+      cache_entry.Sharers.clear();
+      APPEND_TRANSITION_COMMENT(" new owner ");
+      APPEND_TRANSITION_COMMENT(cache_entry.Owner);
+  }
+
+  action(aT_addTCCToSharers, "aT", desc="Add TCC to sharer list") {
+    peek(w_TCCUnblock_in, UnblockMsg) {
+      cache_entry.Sharers.add(in_msg.Sender);
+    }
+  }
+
+  action(as_addToSharers, "as", desc="Add unblocker to sharer list") {
+    peek(unblockNetwork_in, UnblockMsg) {
+      cache_entry.Sharers.add(in_msg.Sender);
+    }
+  }
+
+  action(c_moveOwnerToSharer, "cc", desc="Move owner to sharers") {
+    cache_entry.Sharers.addNetDest(cache_entry.Owner);
+    cache_entry.Owner.clear();
+  }
+
+  action(cc_clearSharers, "\c", desc="Clear the sharers field") {
+    cache_entry.Sharers.clear();
+  }
+
+  action(e_ownerIsUnblocker, "e", desc="The owner is now the unblocker") {
+    peek(unblockNetwork_in, UnblockMsg) {
+      cache_entry.Owner.clear();
+      cache_entry.Owner.add(in_msg.Sender);
+      APPEND_TRANSITION_COMMENT(" tcp_ub owner ");
+      APPEND_TRANSITION_COMMENT(cache_entry.Owner);
+    }
+  }
+
+  action(eT_ownerIsUnblocker, "eT", desc="TCC (unblocker) is now owner") {
+    peek(w_TCCUnblock_in, UnblockMsg) {
+      cache_entry.Owner.clear();
+      cache_entry.Owner.add(in_msg.Sender);
+      APPEND_TRANSITION_COMMENT(" tcc_ub owner ");
+      APPEND_TRANSITION_COMMENT(cache_entry.Owner);
+    }
+  }
+
+  action(ctr_copyTCCResponseToTBE, "ctr", desc="Copy TCC probe response data to TBE") {
+    peek(w_TCCResponse_in, ResponseMsg) {
+      // Overwrite data if tbe does not hold dirty data. Stop once it is dirty.
+      if(tbe.Dirty == false) {
+        tbe.DataBlk := in_msg.DataBlk;
+        tbe.Dirty := in_msg.Dirty;
+        tbe.Sender := in_msg.Sender;
+      }
+      DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk));
+    }
+  }
+
+  action(ccr_copyCoreResponseToTBE, "ccr", desc="Copy core probe response data to TBE") {
+    peek(responseNetwork_in, ResponseMsg) {
+      // Overwrite data if tbe does not hold dirty data. Stop once it is dirty.
+      if(tbe.Dirty == false) {
+          tbe.DataBlk := in_msg.DataBlk;
+          tbe.Dirty := in_msg.Dirty;
+
+          if(tbe.Sender == machineID) {
+              tbe.Sender := in_msg.Sender;
+          }
+      }
+      DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk));
+    }
+  }
+
+  action(cd_clearDirtyBitTBE, "cd", desc="Clear Dirty bit in TBE") {
+      tbe.Dirty := false;
+  }
+
+  action(n_issueRdBlk, "n-", desc="Issue RdBlk") {
+    enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlk;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+    }
+  }
+
+  action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+    enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkS;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+    }
+  }
+
+  action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
+    enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkM;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+    }
+  }
+
+  action(rU_rememberUpgrade, "rU", desc="Remember that this was an upgrade") {
+      tbe.Upgrade := true;
+  }
+
+  action(ruo_rememberUntransferredOwner, "ruo", desc="Remember the untransferred owner") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if(in_msg.UntransferredOwner == true) {
+        tbe.UntransferredOwner := in_msg.Sender;
+        tbe.UntransferredOwnerExists := true;
+      }
+      DPRINTF(RubySlicc, "%s\n", (in_msg));
+    }
+  }
+
+  action(ruoT_rememberUntransferredOwnerTCC, "ruoT", desc="Remember the untransferred owner") {
+    peek(w_TCCResponse_in, ResponseMsg) {
+      if(in_msg.UntransferredOwner == true) {
+        tbe.UntransferredOwner := in_msg.Sender;
+        tbe.UntransferredOwnerExists := true;
+      }
+      DPRINTF(RubySlicc, "%s\n", (in_msg));
+    }
+  }
+
+ action(vd_victim, "vd", desc="Victimize M/O Data") {
+   enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+     out_msg.addr := address;
+     out_msg.Requestor := machineID;
+     out_msg.Destination.add(map_Address_to_Directory(address));
+     out_msg.MessageSize := MessageSizeType:Request_Control;
+     out_msg.Type := CoherenceRequestType:VicDirty;
+     if (cache_entry.CacheState == State:O) {
+       out_msg.Shared := true;
+     } else {
+       out_msg.Shared := false;
+     }
+     out_msg.Dirty := true;
+   }
+ }
+
+  action(vc_victim, "vc", desc="Victimize E/S Data") {
+    enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicClean;
+      if (cache_entry.CacheState == State:S) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+      out_msg.Dirty := false;
+    }
+  }
+
+
+  action(sT_sendRequestToTCC, "sT", desc="send request to TCC") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(w_requestTCC_out, CPURequestMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := in_msg.Type;
+        out_msg.Requestor := in_msg.Requestor;
+        out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                TCC_select_low_bit, TCC_select_num_bits));
+        out_msg.Shared := in_msg.Shared;
+        out_msg.MessageSize := in_msg.MessageSize;
+      }
+      APPEND_TRANSITION_COMMENT(" requestor ");
+      APPEND_TRANSITION_COMMENT(in_msg.Requestor);
+
+    }
+  }
+
+
+  action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
+    MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+                                       TCC_select_low_bit, TCC_select_num_bits);
+
+    temp := cache_entry.Sharers;
+    temp.addNetDest(cache_entry.Owner);
+    if (temp.isElement(tcc)) {
+        temp.remove(tcc);
+    }
+    if (temp.count() > 0) {
+      enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbDowngrade;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination := temp;
+        tbe.NumPendingAcks := temp.count();
+        if(cache_entry.CacheState == State:M) {
+            assert(tbe.NumPendingAcks == 1);
+        }
+        DPRINTF(RubySlicc, "%s\n", (out_msg));
+      }
+    }
+  }
+
+  action(ls2_probeShrL2Data, "ls2", desc="local probe downgrade L2, return data") {
+    MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+                                       TCC_select_low_bit, TCC_select_num_bits);
+    if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+      enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Type := ProbeRequestType:PrbDowngrade;
+          out_msg.ReturnData := true;
+          out_msg.MessageSize := MessageSizeType:Control;
+          out_msg.Destination.add(tcc);
+          tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+
+      }
+    }
+  }
+
+  action(s2_probeShrL2Data, "s2", desc="probe shared L2, return data") {
+    MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+                                       TCC_select_low_bit, TCC_select_num_bits);
+    if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+      enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Type := ProbeRequestType:PrbDowngrade;
+          out_msg.ReturnData := true;
+          out_msg.MessageSize := MessageSizeType:Control;
+          out_msg.Destination.add(tcc);
+          tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+
+      }
+    }
+  }
+
+  action(ldc_probeInvCoreData, "ldc", desc="local probe  to inv cores, return data") {
+    MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+                                       TCC_select_low_bit, TCC_select_num_bits);
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+        NetDest dest:= cache_entry.Sharers;
+        dest.addNetDest(cache_entry.Owner);
+        if(dest.isElement(tcc)){
+         dest.remove(tcc);
+        }
+        dest.remove(in_msg.Requestor);
+        tbe.NumPendingAcks := dest.count();
+        if (dest.count()>0){
+        enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbInv;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+
+        out_msg.Destination.addNetDest(dest);
+        if(cache_entry.CacheState == State:M) {
+		assert(tbe.NumPendingAcks == 1);
+        }
+
+        DPRINTF(RubySlicc, "%s\n", (out_msg));
+       }
+      }
+    }
+  }
+
+  action(ld2_probeInvL2Data, "ld2", desc="local probe inv L2, return data") {
+    MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+                                       TCC_select_low_bit, TCC_select_num_bits);
+    if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+      enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Type := ProbeRequestType:PrbInv;
+          out_msg.ReturnData := true;
+          out_msg.MessageSize := MessageSizeType:Control;
+          out_msg.Destination.add(tcc);
+          tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+
+      }
+    }
+  }
+
+  action(dc_probeInvCoreData, "dc", desc="probe  inv cores + TCC, return data") {
+    MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+                                       TCC_select_low_bit, TCC_select_num_bits);
+    enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := ProbeRequestType:PrbInv;
+      out_msg.ReturnData := true;
+      out_msg.MessageSize := MessageSizeType:Control;
+
+      out_msg.Destination.addNetDest(cache_entry.Sharers);
+      out_msg.Destination.addNetDest(cache_entry.Owner);
+      tbe.NumPendingAcks := cache_entry.Sharers.count() + cache_entry.Owner.count();
+      if(cache_entry.CacheState == State:M) {
+	  assert(tbe.NumPendingAcks == 1);
+      }
+      if (out_msg.Destination.isElement(tcc)) {
+          out_msg.Destination.remove(tcc);
+          tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+      }
+
+      DPRINTF(RubySlicc, "%s\n", (out_msg));
+    }
+  }
+
+  action(d2_probeInvL2Data, "d2", desc="probe inv L2, return data") {
+    MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+                                       TCC_select_low_bit, TCC_select_num_bits);
+    if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+      enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Type := ProbeRequestType:PrbInv;
+          out_msg.ReturnData := true;
+          out_msg.MessageSize := MessageSizeType:Control;
+          out_msg.Destination.add(tcc);
+          tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+
+      }
+    }
+  }
+
+  action(lpc_probeInvCore, "lpc", desc="local probe inv cores, no data") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      TCC_dir_subtree.broadcast(MachineType:TCP);
+      TCC_dir_subtree.broadcast(MachineType:SQC);
+
+      temp := cache_entry.Sharers;
+      temp := temp.OR(cache_entry.Owner);
+      TCC_dir_subtree := TCC_dir_subtree.AND(temp);
+      tbe.NumPendingAcks := TCC_dir_subtree.count();
+      if(cache_entry.CacheState == State:M) {
+	   assert(tbe.NumPendingAcks == 1);
+      }
+      if(TCC_dir_subtree.isElement(in_msg.Requestor)) {
+         TCC_dir_subtree.remove(in_msg.Requestor);
+         tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+      }
+
+      if(TCC_dir_subtree.count() > 0) {
+         enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+           out_msg.addr := address;
+           out_msg.Type := ProbeRequestType:PrbInv;
+           out_msg.ReturnData := false;
+           out_msg.MessageSize := MessageSizeType:Control;
+           out_msg.localCtoD := true;
+
+           out_msg.Destination.addNetDest(TCC_dir_subtree);
+
+           DPRINTF(RubySlicc, "%s\n", (out_msg));
+         }
+       }
+    }
+  }
+
+  action(ipc_probeInvCore, "ipc", desc="probe inv cores, no data") {
+    TCC_dir_subtree.broadcast(MachineType:TCP);
+    TCC_dir_subtree.broadcast(MachineType:SQC);
+
+    temp := cache_entry.Sharers;
+    temp := temp.OR(cache_entry.Owner);
+    TCC_dir_subtree := TCC_dir_subtree.AND(temp);
+    tbe.NumPendingAcks := TCC_dir_subtree.count();
+    if(TCC_dir_subtree.count() > 0) {
+
+      enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbInv;
+        out_msg.ReturnData := false;
+        out_msg.MessageSize := MessageSizeType:Control;
+
+        out_msg.Destination.addNetDest(TCC_dir_subtree);
+        if(cache_entry.CacheState == State:M) {
+          assert(tbe.NumPendingAcks == 1);
+        }
+
+        DPRINTF(RubySlicc, "%s\n", (out_msg));
+      }
+    }
+  }
+
+  action(i2_probeInvL2, "i2", desc="probe inv L2, no data") {
+    MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+                                       TCC_select_low_bit, TCC_select_num_bits);
+    if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
+      enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+          tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+          out_msg.addr := address;
+          out_msg.Type := ProbeRequestType:PrbInv;
+          out_msg.ReturnData := false;
+          out_msg.MessageSize := MessageSizeType:Control;
+          out_msg.Destination.add(tcc);
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+
+      }
+    }
+  }
+
+  action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+    enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC, L3  respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+    enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and TCC respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Dirty := false;
+      out_msg.Ntsl := true;
+      out_msg.Hit := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
+    enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and TCC respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Dirty := false;  // only true if sending back data i think
+      out_msg.Hit := false;
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+
+
+  action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+    enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry) || is_valid(tbe));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.DataBlk := getDataBlock(address);
+      if (is_valid(tbe)) {
+        out_msg.Dirty := tbe.Dirty;
+      }
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+
+  action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+    enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry) || is_valid(tbe));
+      assert(is_valid(cache_entry));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.DataBlk := getDataBlock(address);
+      if (is_valid(tbe)) {
+        out_msg.Dirty := tbe.Dirty;
+      }
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(mc_cancelWB, "mc", desc="send writeback cancel to NB directory") {
+    enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:WrCancel;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Requestor := machineID;
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+    }
+  }
+
+ action(sCS_sendCollectiveResponseS, "sCS", desc="send shared response to all merged TCP/SQC") {
+      enqueue(responseToCore_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysResp;
+        out_msg.Sender := tbe.Sender;
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.CtoD := false;
+        out_msg.State := CoherenceState:Shared;
+        out_msg.Destination.addNetDest(cache_entry.MergedSharers);
+        out_msg.Shared := tbe.Shared;
+        out_msg.Dirty := tbe.Dirty;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+  }
+
+ action(sS_sendResponseS, "sS", desc="send shared response to TCP/SQC") {
+      enqueue(responseToCore_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysResp;
+        out_msg.Sender := tbe.Sender;
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.CtoD := false;
+        out_msg.State := CoherenceState:Shared;
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.Shared := tbe.Shared;
+        out_msg.Dirty := tbe.Dirty;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+  }
+
+ action(sM_sendResponseM, "sM", desc="send response to TCP/SQC") {
+      enqueue(responseToCore_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysResp;
+        out_msg.Sender := tbe.Sender;
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.CtoD := false;
+        out_msg.State := CoherenceState:Modified;
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.Shared := tbe.Shared;
+        out_msg.Dirty := tbe.Dirty;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+  }
+
+
+
+ action(fw2_forwardWBAck, "fw2", desc="forward WBAck to TCC") {
+    peek(responseFromNB_in, ResponseMsg) {
+      if(tbe.OriginalRequestor != machineID) {
+        enqueue(w_respTCC_out, ResponseMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:TDSysWBAck;
+          out_msg.Sender := machineID;
+          //out_msg.DataBlk := tbe.DataBlk;
+          out_msg.Destination.add(tbe.OriginalRequestor);
+          out_msg.MessageSize := in_msg.MessageSize;
+        }
+      }
+    }
+  }
+
+ action(sa_saveSysAck, "sa", desc="Save SysAck ") {
+    peek(responseFromNB_in, ResponseMsg) {
+        tbe.Dirty := in_msg.Dirty;
+        if (tbe.Dirty == false) {
+           tbe.DataBlk := in_msg.DataBlk;
+        }
+        else {
+           tbe.DataBlk := tbe.DataBlk;
+        }
+        tbe.CtoD := in_msg.CtoD;
+        tbe.CohState := in_msg.State;
+        tbe.Shared := in_msg.Shared;
+        tbe.MessageSize := in_msg.MessageSize;
+    }
+  }
+
+ action(fsa_forwardSavedAck, "fsa", desc="forward saved SysAck to TCP or SQC") {
+      enqueue(responseToCore_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysResp;
+        out_msg.Sender := machineID;
+        if (tbe.Dirty == false) {
+           out_msg.DataBlk := tbe.DataBlk;
+        }
+        else {
+           out_msg.DataBlk := tbe.DataBlk;
+        }
+        out_msg.CtoD := tbe.CtoD;
+        out_msg.State := tbe.CohState;
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.Shared := tbe.Shared;
+        out_msg.MessageSize := tbe.MessageSize;
+        out_msg.Dirty := tbe.Dirty;
+        out_msg.Sender := tbe.Sender;
+      }
+  }
+
+ action(fa_forwardSysAck, "fa", desc="forward SysAck to TCP or SQC") {
+    peek(responseFromNB_in, ResponseMsg) {
+      enqueue(responseToCore_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysResp;
+        out_msg.Sender := machineID;
+        if (tbe.Dirty == false) {
+           out_msg.DataBlk := in_msg.DataBlk;
+           tbe.Sender := machineID;
+        }
+        else {
+           out_msg.DataBlk := tbe.DataBlk;
+        }
+        out_msg.CtoD := in_msg.CtoD;
+        out_msg.State := in_msg.State;
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.Shared := in_msg.Shared;
+        out_msg.MessageSize := in_msg.MessageSize;
+        out_msg.Dirty := in_msg.Dirty;
+        out_msg.Sender := tbe.Sender;
+        DPRINTF(RubySlicc, "%s\n", (out_msg.DataBlk));
+      }
+    }
+  }
+
+ action(pso_probeSharedDataOwner, "pso", desc="probe shared data at owner") {
+    MachineID tcc := mapAddressToRange(address,MachineType:TCC,
+                                       TCC_select_low_bit, TCC_select_num_bits);
+    if (cache_entry.Owner.isElement(tcc)) {
+      enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbDowngrade;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination.add(tcc);
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+    else { // i.e., owner is a core
+      enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbDowngrade;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination.addNetDest(cache_entry.Owner);
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+    tbe.NumPendingAcks := 1;
+  }
+
+  action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") {
+    coreRequestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(j_popIncomingUnblockQueue, "j", desc="Pop incoming unblock queue") {
+    unblockNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pk_popResponseQueue, "pk", desc="Pop response queue") {
+    responseNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="Pop incoming probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pR_popResponseFromNBQueue, "pR", desc="Pop incoming Response queue From NB") {
+    responseFromNB_in.dequeue(clockEdge());
+  }
+
+  action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
+    triggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(pl_popTCCRequestQueue, "pl", desc="pop TCC request queue") {
+    w_TCCRequest_in.dequeue(clockEdge());
+  }
+
+  action(plr_popTCCResponseQueue, "plr", desc="pop TCC response queue") {
+    w_TCCResponse_in.dequeue(clockEdge());
+  }
+
+  action(plu_popTCCUnblockQueue, "plu", desc="pop TCC unblock queue") {
+    w_TCCUnblock_in.dequeue(clockEdge());
+  }
+
+
+  action(m_addUnlockerToSharers, "m", desc="Add the unlocker to the sharer list") {
+    peek(unblockNetwork_in, UnblockMsg) {
+      cache_entry.Sharers.add(in_msg.Sender);
+      cache_entry.MergedSharers.remove(in_msg.Sender);
+      assert(cache_entry.WaitingUnblocks >= 0);
+      cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks - 1;
+    }
+  }
+
+  action(q_addOutstandingMergedSharer, "q", desc="Increment outstanding requests") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      cache_entry.MergedSharers.add(in_msg.Requestor);
+      cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks + 1;
+    }
+  }
+
+  action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+    enqueue(unblockToNB_out, UnblockMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(zz_recycleRequest, "\z", desc="Recycle the request queue") {
+    coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(yy_recycleTCCRequestQueue, "yy", desc="recycle yy request queue") {
+    w_TCCRequest_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(xz_recycleResponseQueue, "xz", desc="recycle response queue") {
+    responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(xx_recycleTCCResponseQueue, "xx", desc="recycle TCC response queue") {
+    w_TCCResponse_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(vv_recycleTCCUnblockQueue, "vv", desc="Recycle the probe request queue") {
+    w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(xy_recycleUnblockQueue, "xy", desc="Recycle the probe request queue") {
+    w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(ww_recycleProbeRequest, "ww", desc="Recycle the probe request queue") {
+    probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(x_decrementAcks, "x", desc="decrement Acks pending") {
+    tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+  }
+
+  action(o_checkForAckCompletion, "o", desc="check for ack completion") {
+    if (tbe.NumPendingAcks == 0) {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:AcksComplete;
+      }
+    }
+    APPEND_TRANSITION_COMMENT(" tbe acks ");
+    APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+  }
+
+  action(tp_allocateTBE, "tp", desc="allocate TBE Entry for upward transactions") {
+    check_allocate(TBEs);
+    peek(probeNetwork_in, NBProbeRequestMsg) {
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      tbe.Dirty := false;
+      tbe.NumPendingAcks := 0;
+      tbe.UntransferredOwnerExists := false;
+    }
+  }
+
+  action(tv_allocateTBE, "tv", desc="allocate TBE Entry for TCC transactions") {
+      check_allocate(TBEs);
+    peek(w_TCCRequest_in, CPURequestMsg) {
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      tbe.DataBlk := in_msg.DataBlk; // Data only for WBs
+      tbe.Dirty := false;
+      tbe.OriginalRequestor := in_msg.Requestor;
+      tbe.NumPendingAcks := 0;
+      tbe.UntransferredOwnerExists := false;
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+      check_allocate(TBEs);//check whether resources are full
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
+      tbe.Dirty := false;
+      tbe.Upgrade := false;
+      tbe.OriginalRequestor := in_msg.Requestor;
+      tbe.NumPendingAcks := 0;
+      tbe.UntransferredOwnerExists := false;
+      tbe.Sender := machineID;
+    }
+  }
+
+  action(tr_allocateTBE, "tr", desc="allocate TBE Entry for recall") {
+      check_allocate(TBEs);//check whether resources are full
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
+      tbe.Dirty := false;
+      tbe.Upgrade := false;
+      tbe.OriginalRequestor := machineID; //Recall request, Self initiated
+      tbe.NumPendingAcks := 0;
+      tbe.UntransferredOwnerExists := false;
+  }
+
+  action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+
+  action(d_allocateDir, "d", desc="allocate Directory Cache") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(directory.allocate(address, new Entry));
+    }
+  }
+
+  action(dd_deallocateDir, "dd", desc="deallocate Directory Cache") {
+    if (is_valid(cache_entry)) {
+        directory.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+     enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+         out_msg.addr := address;
+         out_msg.Type := CoherenceResponseType:StaleNotif;
+         out_msg.Destination.add(map_Address_to_Directory(address));
+         out_msg.Sender := machineID;
+         out_msg.MessageSize := MessageSizeType:Response_Control;
+     }
+  }
+
+  action(wb_data, "wb", desc="write back data") {
+    enqueue(responseToNB_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUData;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.Dirty := tbe.Dirty;
+      if (tbe.Shared) {
+        out_msg.NbReqShared := true;
+      } else {
+        out_msg.NbReqShared := false;
+      }
+      out_msg.State := CoherenceState:Shared; // faux info
+      out_msg.MessageSize := MessageSizeType:Writeback_Data;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
+    assert(is_valid(tbe));
+    tbe.Shared := true;
+  }
+
+  action(y_writeDataToTBE, "y", desc="write Probe Data to TBE") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (!tbe.Dirty || in_msg.Dirty) {
+        tbe.DataBlk := in_msg.DataBlk;
+        tbe.Dirty := in_msg.Dirty;
+      }
+      if (in_msg.Hit) {
+        tbe.Cached := true;
+      }
+    }
+  }
+
+  action(ty_writeTCCDataToTBE, "ty", desc="write TCC Probe Data to TBE") {
+    peek(w_TCCResponse_in, ResponseMsg) {
+      if (!tbe.Dirty || in_msg.Dirty) {
+        tbe.DataBlk := in_msg.DataBlk;
+        tbe.Dirty := in_msg.Dirty;
+      }
+      if (in_msg.Hit) {
+        tbe.Cached := true;
+      }
+    }
+  }
+
+
+  action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+    directory.setMRU(address);
+  }
+
+  // TRANSITIONS
+
+  // Handling TCP/SQC requests (similar to how NB dir handles TCC events with some changes to account for stateful directory).
+
+
+  // transitions from base
+  transition(I, RdBlk, I_ES){TagArrayRead} {
+    d_allocateDir;
+    t_allocateTBE;
+    n_issueRdBlk;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(I, RdBlkS, I_S){TagArrayRead} {
+    d_allocateDir;
+    t_allocateTBE;
+    nS_issueRdBlkS;
+    i_popIncomingRequestQueue;
+  }
+
+
+  transition(I_S, NB_AckS, BBB_S) {
+    fa_forwardSysAck;
+    pR_popResponseFromNBQueue;
+  }
+
+  transition(I_ES, NB_AckS, BBB_S) {
+    fa_forwardSysAck;
+    pR_popResponseFromNBQueue;
+  }
+
+ transition(I_ES, NB_AckE, BBB_E) {
+    fa_forwardSysAck;
+    pR_popResponseFromNBQueue;
+  }
+
+  transition({S_M, O_M}, {NB_AckCtoD,NB_AckM}, BBB_M) {
+    fa_forwardSysAck;
+    pR_popResponseFromNBQueue;
+  }
+
+  transition(I_M, NB_AckM, BBB_M) {
+    fa_forwardSysAck;
+    pR_popResponseFromNBQueue;
+  }
+
+  transition(BBB_M, CoreUnblock, M){TagArrayWrite} {
+    c_clearOwner;
+    cc_clearSharers;
+    e_ownerIsUnblocker;
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(BBB_S, CoreUnblock, S){TagArrayWrite}  {
+    as_addToSharers;
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(BBB_E, CoreUnblock, E){TagArrayWrite}  {
+    as_addToSharers;
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+
+  transition(I, RdBlkM, I_M){TagArrayRead}  {
+    d_allocateDir;
+    t_allocateTBE;
+    nM_issueRdBlkM;
+    i_popIncomingRequestQueue;
+  }
+
+  //
+  transition(S, {RdBlk, RdBlkS}, BBS_S){TagArrayRead} {
+    t_allocateTBE;
+    sc_probeShrCoreData;
+    s2_probeShrL2Data;
+    q_addOutstandingMergedSharer;
+    i_popIncomingRequestQueue;
+  }
+  // Merging of read sharing into a single request
+  transition(BBS_S, {RdBlk, RdBlkS}) {
+    q_addOutstandingMergedSharer;
+    i_popIncomingRequestQueue;
+  }
+  // Wait for probe acks to be complete
+  transition(BBS_S, CPUPrbResp) {
+    ccr_copyCoreResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+
+  transition(BBS_S, TCCPrbResp) {
+    ctr_copyTCCResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+
+  // Window for merging complete with this transition
+  // Send responses to all outstanding
+  transition(BBS_S, ProbeAcksComplete, BB_S) {
+    sCS_sendCollectiveResponseS;
+    pt_popTriggerQueue;
+  }
+
+  transition(BB_S, CoreUnblock, BB_S) {
+    m_addUnlockerToSharers;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(BB_S, LastCoreUnblock, S) {
+    m_addUnlockerToSharers;
+    dt_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(O, {RdBlk, RdBlkS}, BBO_O){TagArrayRead} {
+    t_allocateTBE;
+    pso_probeSharedDataOwner;
+    q_addOutstandingMergedSharer;
+    i_popIncomingRequestQueue;
+  }
+  // Merging of read sharing into a single request
+  transition(BBO_O, {RdBlk, RdBlkS}) {
+    q_addOutstandingMergedSharer;
+    i_popIncomingRequestQueue;
+  }
+
+  // Wait for probe acks to be complete
+  transition(BBO_O, CPUPrbResp) {
+    ccr_copyCoreResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+
+  transition(BBO_O, TCCPrbResp) {
+    ctr_copyTCCResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+
+  // Window for merging complete with this transition
+  // Send responses to all outstanding
+  transition(BBO_O, ProbeAcksComplete, BB_OO) {
+    sCS_sendCollectiveResponseS;
+    pt_popTriggerQueue;
+  }
+
+  transition(BB_OO, CoreUnblock) {
+    m_addUnlockerToSharers;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(BB_OO, LastCoreUnblock, O){TagArrayWrite} {
+    m_addUnlockerToSharers;
+    dt_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(S, CPUWrite, BW_S){TagArrayRead} {
+    t_allocateTBE;
+    rC_removeCoreFromSharers;
+    sT_sendRequestToTCC;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(E, CPUWrite, BW_E){TagArrayRead} {
+    t_allocateTBE;
+    rC_removeCoreFromSharers;
+    sT_sendRequestToTCC;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(O, CPUWrite, BW_O){TagArrayRead} {
+    t_allocateTBE;
+    rCo_removeCoreFromOwner;
+    rC_removeCoreFromSharers;
+    sT_sendRequestToTCC;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(M, CPUWrite, BW_M){TagArrayRead} {
+    t_allocateTBE;
+    rCo_removeCoreFromOwner;
+    rC_removeCoreFromSharers;
+    sT_sendRequestToTCC;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(BW_S, TCCUnblock_Sharer, S){TagArrayWrite} {
+    aT_addTCCToSharers;
+    dt_deallocateTBE;
+    plu_popTCCUnblockQueue;
+  }
+
+  transition(BW_S, TCCUnblock_NotValid, S){TagArrayWrite} {
+    dt_deallocateTBE;
+    plu_popTCCUnblockQueue;
+  }
+
+  transition(BW_E, TCCUnblock, E){TagArrayWrite} {
+    cc_clearSharers;
+    aT_addTCCToSharers;
+    dt_deallocateTBE;
+    plu_popTCCUnblockQueue;
+  }
+
+  transition(BW_E, TCCUnblock_NotValid, E) {
+    dt_deallocateTBE;
+    plu_popTCCUnblockQueue;
+  }
+
+  transition(BW_M, TCCUnblock, M) {
+    c_clearOwner;
+    cc_clearSharers;
+    eT_ownerIsUnblocker;
+    dt_deallocateTBE;
+    plu_popTCCUnblockQueue;
+  }
+
+  transition(BW_M, TCCUnblock_NotValid, M) {
+    // Note this transition should only be executed if we received a stale wb
+    dt_deallocateTBE;
+    plu_popTCCUnblockQueue;
+  }
+
+  transition(BW_O, TCCUnblock, O) {
+    c_clearOwner;
+    eT_ownerIsUnblocker;
+    dt_deallocateTBE;
+    plu_popTCCUnblockQueue;
+  }
+
+  transition(BW_O, TCCUnblock_NotValid, O) {
+    // Note this transition should only be executed if we received a stale wb
+    dt_deallocateTBE;
+    plu_popTCCUnblockQueue;
+  }
+
+  // We lost the owner likely do to an invalidation racing with a 'O' wb
+  transition(BW_O, TCCUnblock_Sharer, S) {
+    c_clearOwner;
+    aT_addTCCToSharers;
+    dt_deallocateTBE;
+    plu_popTCCUnblockQueue;
+  }
+
+  transition({BW_M, BW_S, BW_E, BW_O}, {PrbInv,PrbInvData,PrbShrData}) {
+    ww_recycleProbeRequest;
+  }
+
+  transition(BRWD_I, {PrbInvData, PrbInv, PrbShrData}) {
+    ww_recycleProbeRequest;
+  }
+
+  // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD
+  transition(S, CtoD, BBS_UM) {TagArrayRead} {
+    t_allocateTBE;
+    lpc_probeInvCore;
+    i2_probeInvL2;
+    o_checkForAckCompletion;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(BBS_UM, CPUPrbResp, BBS_UM) {
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+
+  transition(BBS_UM, TCCPrbResp) {
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+
+  transition(BBS_UM, ProbeAcksComplete, S_M) {
+    rU_rememberUpgrade;
+    nM_issueRdBlkM;
+    pt_popTriggerQueue;
+  }
+
+  // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD
+  transition(O, CtoD, BBO_UM){TagArrayRead} {
+    t_allocateTBE;
+    lpc_probeInvCore;
+    i2_probeInvL2;
+    o_checkForAckCompletion;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(BBO_UM, CPUPrbResp, BBO_UM) {
+    ruo_rememberUntransferredOwner;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+
+  transition(BBO_UM, TCCPrbResp) {
+    ruoT_rememberUntransferredOwnerTCC;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+
+  transition(BBO_UM, ProbeAcksComplete, O_M) {
+    rU_rememberUpgrade;
+    nM_issueRdBlkM;
+    pt_popTriggerQueue;
+  }
+
+  transition({S,E}, RdBlkM, BBS_M){TagArrayWrite} {
+    t_allocateTBE;
+    ldc_probeInvCoreData;
+    ld2_probeInvL2Data;
+    o_checkForAckCompletion;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(BBS_M, CPUPrbResp) {
+    ccr_copyCoreResponseToTBE;
+    rR_removeResponderFromSharers;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+
+  transition(BBS_M, TCCPrbResp) {
+    ctr_copyTCCResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+
+  transition(BBS_M, ProbeAcksComplete, S_M) {
+    nM_issueRdBlkM;
+    pt_popTriggerQueue;
+  }
+
+  transition(O, RdBlkM, BBO_M){TagArrayRead} {
+    t_allocateTBE;
+    ldc_probeInvCoreData;
+    ld2_probeInvL2Data;
+    o_checkForAckCompletion;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(BBO_M, CPUPrbResp) {
+    ccr_copyCoreResponseToTBE;
+    rR_removeResponderFromSharers;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+
+  transition(BBO_M, TCCPrbResp) {
+    ctr_copyTCCResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+
+  transition(BBO_M, ProbeAcksComplete, O_M) {
+    nM_issueRdBlkM;
+    pt_popTriggerQueue;
+  }
+
+  //
+  transition(M, RdBlkM, BBM_M){TagArrayRead} {
+    t_allocateTBE;
+    ldc_probeInvCoreData;
+    ld2_probeInvL2Data;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(BBM_M, CPUPrbResp) {
+    ccr_copyCoreResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+
+  // TCP recalled block before receiving probe
+  transition({BBM_M, BBS_M, BBO_M}, {CPUWrite,NoCPUWrite}) {
+    zz_recycleRequest;
+  }
+
+  transition(BBM_M, TCCPrbResp) {
+    ctr_copyTCCResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+
+  transition(BBM_M, ProbeAcksComplete, BB_M) {
+    sM_sendResponseM;
+    pt_popTriggerQueue;
+  }
+
+  transition(BB_M, CoreUnblock, M){TagArrayWrite} {
+    e_ownerIsUnblocker;
+    dt_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(M, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} {
+    t_allocateTBE;
+    sc_probeShrCoreData;
+    s2_probeShrL2Data;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(E, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} {
+    t_allocateTBE;
+    eto_moveExSharerToOwner;
+    sc_probeShrCoreData;
+    s2_probeShrL2Data;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(BBM_O, CPUPrbResp) {
+    ccr_copyCoreResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+  transition(BBM_O, TCCPrbResp) {
+    ctr_copyTCCResponseToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  transition(BBM_O, ProbeAcksComplete, BB_O) {
+    sS_sendResponseS;
+    pt_popTriggerQueue;
+  }
+
+  transition(BB_O, CoreUnblock, O){TagArrayWrite} {
+    as_addToSharers;
+    dt_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition({BBO_O, BBM_M, BBS_S, BBM_O, BB_M, BB_O, BB_S, BBO_UM, BBS_UM, BBS_M, BBO_M, BB_OO}, {PrbInvData, PrbInv,PrbShrData}) {
+    ww_recycleProbeRequest;
+  }
+
+  transition({BBM_O, BBS_S, CP_S, CP_O, CP_SM, CP_OM, BBO_O}, {CPUWrite,NoCPUWrite}) {
+    zz_recycleRequest;
+  }
+
+  // stale CtoD raced with external invalidation
+  transition({I, CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, CtoD) {
+    i_popIncomingRequestQueue;
+  }
+
+  // stale CtoD raced with internal RdBlkM
+  transition({BBM_M, BBS_M, BBO_M, BBB_M, BBS_UM, BBO_UM}, CtoD) {
+    i_popIncomingRequestQueue;
+  }
+
+  transition({E, M}, CtoD) {
+    i_popIncomingRequestQueue;
+  }
+
+
+  // TCC-directory has sent out (And potentially received acks for) probes.
+  // TCP/SQC replacement (known to be stale subsequent) are popped off.
+  transition({BBO_UM, BBS_UM}, {CPUWrite,NoCPUWrite}) {
+    nC_sendNullWBAckToCore;
+    i_popIncomingRequestQueue;
+  }
+
+  transition(S_M, {NoCPUWrite, CPUWrite}) {
+    zz_recycleRequest;
+  }
+
+  transition(O_M, {NoCPUWrite, CPUWrite}) {
+    zz_recycleRequest;
+  }
+
+
+  transition({BBM_M, BBS_M, BBO_M, BBO_UM, BBS_UM}, {VicDirty, VicClean, VicDirtyLast, NoVic}) {
+    nT_sendNullWBAckToTCC;
+    pl_popTCCRequestQueue;
+  }
+
+  transition({CP_S, CP_O, CP_OM, CP_SM}, {VicDirty, VicClean, VicDirtyLast, CancelWB, NoVic}) {
+    yy_recycleTCCRequestQueue;
+  }
+
+  // However, when TCCdir has sent out PrbSharedData, one cannot ignore.
+  transition({BBS_S, BBO_O, BBM_O, S_M, O_M, BBB_M, BBB_S, BBB_E}, {VicDirty, VicClean, VicDirtyLast,CancelWB}) {
+    yy_recycleTCCRequestQueue;
+  }
+
+  transition({BW_S,BW_E,BW_O, BW_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) {
+    yy_recycleTCCRequestQueue;
+  }
+
+  transition({BW_S,BW_E,BW_O, BW_M}, CancelWB) {
+   nT_sendNullWBAckToTCC;
+   pl_popTCCRequestQueue;
+  }
+
+
+  /// recycle if waiting for unblocks.
+  transition({BB_M,BB_O,BB_S,BB_OO}, {VicDirty, VicClean, VicDirtyLast,NoVic,CancelWB}) {
+    yy_recycleTCCRequestQueue;
+  }
+
+  transition({BBS_S, BBO_O}, NoVic) {
+   rT_removeTCCFromSharers;
+   nT_sendNullWBAckToTCC;
+   pl_popTCCRequestQueue;
+  }
+
+  // stale. Pop message and send dummy ack.
+  transition({I_S, I_ES, I_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) {
+    nT_sendNullWBAckToTCC;
+    pl_popTCCRequestQueue;
+  }
+
+  transition(M,  VicDirtyLast, VM_I){TagArrayRead} {
+    tv_allocateTBE;
+    vd_victim;
+    pl_popTCCRequestQueue;
+  }
+
+  transition(E,  VicDirty, VM_I){TagArrayRead} {
+    tv_allocateTBE;
+    vd_victim;
+    pl_popTCCRequestQueue;
+  }
+
+  transition(O, VicDirty, VO_S){TagArrayRead} {
+    tv_allocateTBE;
+    vd_victim;
+    pl_popTCCRequestQueue;
+  }
+
+  transition(O, {VicDirtyLast, VicClean}, VO_I){TagArrayRead} {
+    tv_allocateTBE;
+    vd_victim;
+    pl_popTCCRequestQueue;
+  }
+
+  transition({E, S}, VicClean, VES_I){TagArrayRead} {
+    tv_allocateTBE;
+    vc_victim;
+    pl_popTCCRequestQueue;
+  }
+
+  transition({O, S}, NoVic){TagArrayRead} {
+    rT_removeTCCFromSharers;
+    nT_sendNullWBAckToTCC;
+    pl_popTCCRequestQueue;
+  }
+
+  transition({O,S}, NoCPUWrite){TagArrayRead} {
+    rC_removeCoreFromSharers;
+    nC_sendNullWBAckToCore;
+    i_popIncomingRequestQueue;
+  }
+
+  transition({M,E}, NoCPUWrite){TagArrayRead} {
+    rC_removeCoreFromSharers;
+    nC_sendNullWBAckToCore;
+    i_popIncomingRequestQueue;
+  }
+
+  // This can only happen if it is  race. (TCCdir sent out probes which caused this cancel in the first place.)
+  transition({VM_I, VES_I, VO_I}, CancelWB) {
+    pl_popTCCRequestQueue;
+  }
+
+  transition({VM_I, VES_I, VO_I}, NB_AckWB, I){TagArrayWrite} {
+    c_clearOwner;
+    cc_clearSharers;
+    wb_data;
+    fw2_forwardWBAck;
+    dt_deallocateTBE;
+    dd_deallocateDir;
+    pR_popResponseFromNBQueue;
+  }
+
+  transition(VO_S, NB_AckWB, S){TagArrayWrite}  {
+    c_clearOwner;
+    wb_data;
+    fw2_forwardWBAck;
+    dt_deallocateTBE;
+    pR_popResponseFromNBQueue;
+  }
+
+  transition(I_C, NB_AckWB, I){TagArrayWrite}  {
+    c_clearOwner;
+    cc_clearSharers;
+    ss_sendStaleNotification;
+    fw2_forwardWBAck;
+    dt_deallocateTBE;
+    dd_deallocateDir;
+    pR_popResponseFromNBQueue;
+  }
+
+  transition(I_W, NB_AckWB, I) {
+    ss_sendStaleNotification;
+    dt_deallocateTBE;
+    dd_deallocateDir;
+    pR_popResponseFromNBQueue;
+  }
+
+
+
+  // Do not handle replacements, reads of any kind or writebacks from transients; recycle
+  transition({I_M, I_ES, I_S, MO_I, ES_I, S_M, O_M, VES_I, VO_I, VO_S, VM_I, I_C, I_W}, {RdBlkS,RdBlkM,RdBlk,CtoD}) {
+    zz_recycleRequest;
+  }
+
+  transition( VO_S, NoCPUWrite) {
+    zz_recycleRequest;
+  }
+
+  transition({BW_M, BW_S, BW_O, BW_E}, {RdBlkS,RdBlkM,RdBlk,CtoD,NoCPUWrite, CPUWrite}) {
+    zz_recycleRequest;
+  }
+
+  transition({BBB_M, BBB_S, BBB_E, BB_O, BB_M, BB_S, BB_OO}, { RdBlk, RdBlkS, RdBlkM, CPUWrite, NoCPUWrite}) {
+    zz_recycleRequest;
+  }
+
+  transition({BBB_S, BBB_E, BB_O, BB_S, BB_OO}, { CtoD}) {
+    zz_recycleRequest;
+  }
+
+  transition({BBS_UM, BBO_UM, BBM_M, BBM_O, BBS_M, BBO_M}, { RdBlk, RdBlkS, RdBlkM}) {
+    zz_recycleRequest;
+  }
+
+  transition(BBM_O, CtoD) {
+    zz_recycleRequest;
+  }
+
+  transition({BBS_S, BBO_O}, {RdBlkM, CtoD}) {
+    zz_recycleRequest;
+  }
+
+  transition({B_I, CP_I, CP_S, CP_O, CP_OM, CP_SM, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {RdBlk, RdBlkS, RdBlkM}) {
+    zz_recycleRequest;
+  }
+
+  transition({CP_O, CP_S, CP_OM}, CtoD) {
+    zz_recycleRequest;
+  }
+
+  // Ignore replacement related messages after probe got in.
+  transition({CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {CPUWrite, NoCPUWrite}) {
+    zz_recycleRequest;
+  }
+
+  // Ignore replacement related messages after probes processed
+  transition({I, I_S, I_ES, I_M, I_C, I_W}, {CPUWrite,NoCPUWrite}) {
+      nC_sendNullWBAckToCore;
+    i_popIncomingRequestQueue;
+  }
+  // cannot ignore cancel... otherwise TCP/SQC will be stuck in I_C
+  transition({I, I_S, I_ES, I_M, I_C, I_W, S_M, M, O, E, S}, CPUWriteCancel){TagArrayRead}  {
+    nC_sendNullWBAckToCore;
+    i_popIncomingRequestQueue;
+  }
+
+  transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, {NoVic, VicClean, VicDirty, VicDirtyLast}){
+    nT_sendNullWBAckToTCC;
+    pl_popTCCRequestQueue;
+  }
+
+  // Handling Probes from NB (General process: (1) propagate up, go to blocking state (2) process acks (3) on last ack downward.)
+
+  // step 1
+  transition({M, O, E, S}, PrbInvData, CP_I){TagArrayRead} {
+    tp_allocateTBE;
+    dc_probeInvCoreData;
+    d2_probeInvL2Data;
+    pp_popProbeQueue;
+  }
+  // step 2a
+  transition(CP_I, CPUPrbResp) {
+    y_writeDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+  // step 2b
+  transition(CP_I, TCCPrbResp) {
+    ty_writeTCCDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  // step 3
+  transition(CP_I, ProbeAcksComplete, I){TagArrayWrite} {
+    pd_sendProbeResponseData;
+    c_clearOwner;
+    cc_clearSharers;
+    dt_deallocateTBE;
+    dd_deallocateDir;
+    pt_popTriggerQueue;
+  }
+
+  // step 1
+  transition({M, O, E, S}, PrbInv, B_I){TagArrayWrite} {
+    tp_allocateTBE;
+    ipc_probeInvCore;
+    i2_probeInvL2;
+    pp_popProbeQueue;
+  }
+  // step 2
+  transition(B_I, CPUPrbResp) {
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+  // step 2b
+  transition(B_I, TCCPrbResp) {
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  // step 3
+  transition(B_I, ProbeAcksComplete, I){TagArrayWrite} {
+    // send response down to NB
+    pi_sendProbeResponseInv;
+    c_clearOwner;
+    cc_clearSharers;
+    dt_deallocateTBE;
+    dd_deallocateDir;
+    pt_popTriggerQueue;
+  }
+
+
+  // step 1
+  transition({M, O}, PrbShrData, CP_O){TagArrayRead} {
+    tp_allocateTBE;
+    sc_probeShrCoreData;
+    s2_probeShrL2Data;
+    pp_popProbeQueue;
+  }
+
+  transition(E, PrbShrData, CP_O){TagArrayRead} {
+    tp_allocateTBE;
+    eto_moveExSharerToOwner;
+    sc_probeShrCoreData;
+    s2_probeShrL2Data;
+    pp_popProbeQueue;
+  }
+  // step 2
+  transition(CP_O, CPUPrbResp) {
+    y_writeDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+  // step 2b
+  transition(CP_O, TCCPrbResp) {
+    ty_writeTCCDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  // step 3
+  transition(CP_O, ProbeAcksComplete, O){TagArrayWrite} {
+    // send response down to NB
+    pd_sendProbeResponseData;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  //step 1
+  transition(S, PrbShrData, CP_S) {
+    tp_allocateTBE;
+    sc_probeShrCoreData;
+    s2_probeShrL2Data;
+    pp_popProbeQueue;
+  }
+  // step 2
+  transition(CP_S, CPUPrbResp) {
+    y_writeDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+  // step 2b
+  transition(CP_S, TCCPrbResp) {
+    ty_writeTCCDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  // step 3
+  transition(CP_S, ProbeAcksComplete, S) {
+    // send response down to NB
+    pd_sendProbeResponseData;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  // step 1
+  transition(O_M, PrbInvData, CP_IOM) {
+    dc_probeInvCoreData;
+    d2_probeInvL2Data;
+    pp_popProbeQueue;
+  }
+  // step 2a
+  transition(CP_IOM, CPUPrbResp) {
+    y_writeDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+  // step 2b
+  transition(CP_IOM, TCCPrbResp) {
+    ty_writeTCCDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  // step 3
+  transition(CP_IOM, ProbeAcksComplete, I_M) {
+    pdm_sendProbeResponseDataMs;
+    c_clearOwner;
+    cc_clearSharers;
+    cd_clearDirtyBitTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(CP_IOM, ProbeAcksCompleteReissue, I){TagArrayWrite} {
+    pdm_sendProbeResponseDataMs;
+    c_clearOwner;
+    cc_clearSharers;
+    dt_deallocateTBE;
+    dd_deallocateDir;
+    pt_popTriggerQueue;
+  }
+
+  // step 1
+  transition(S_M, PrbInvData, CP_ISM) {
+    dc_probeInvCoreData;
+    d2_probeInvL2Data;
+    o_checkForAckCompletion;
+    pp_popProbeQueue;
+  }
+  // step 2a
+  transition(CP_ISM, CPUPrbResp) {
+    y_writeDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+  // step 2b
+  transition(CP_ISM, TCCPrbResp) {
+    ty_writeTCCDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  // step 3
+  transition(CP_ISM, ProbeAcksComplete, I_M) {
+    pdm_sendProbeResponseDataMs;
+    c_clearOwner;
+    cc_clearSharers;
+    cd_clearDirtyBitTBE;
+
+    //dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+  transition(CP_ISM, ProbeAcksCompleteReissue, I){TagArrayWrite} {
+    pim_sendProbeResponseInvMs;
+    c_clearOwner;
+    cc_clearSharers;
+    dt_deallocateTBE;
+    dd_deallocateDir;
+    pt_popTriggerQueue;
+  }
+
+  // step 1
+  transition({S_M, O_M}, {PrbInv}, CP_ISM) {
+    dc_probeInvCoreData;
+    d2_probeInvL2Data;
+    pp_popProbeQueue;
+  }
+  // next steps inherited from BS_ISM
+
+  // Simpler cases
+
+  transition({I_C, I_W}, {PrbInvData, PrbInv, PrbShrData}) {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  //If the directory is certain that the block is not present, one can send an acknowledgement right away.
+  // No need for three step process.
+  transition(I, {PrbInv,PrbShrData,PrbInvData}){TagArrayRead} {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition({I_M, I_ES, I_S}, {PrbInv, PrbInvData}) {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition({I_M, I_ES, I_S}, PrbShrData) {
+    prm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  //step 1
+  transition(S_M, PrbShrData, CP_SM) {
+    sc_probeShrCoreData;
+    s2_probeShrL2Data;
+    o_checkForAckCompletion;
+    pp_popProbeQueue;
+  }
+  // step 2
+  transition(CP_SM, CPUPrbResp) {
+    y_writeDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+  // step 2b
+  transition(CP_SM, TCCPrbResp) {
+    ty_writeTCCDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  // step 3
+  transition(CP_SM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, S_M){DataArrayRead} {
+    // send response down to NB
+    pd_sendProbeResponseData;
+    pt_popTriggerQueue;
+  }
+
+  //step 1
+  transition(O_M, PrbShrData, CP_OM) {
+    sc_probeShrCoreData;
+    s2_probeShrL2Data;
+    pp_popProbeQueue;
+  }
+  // step 2
+  transition(CP_OM, CPUPrbResp) {
+    y_writeDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+  // step 2b
+  transition(CP_OM, TCCPrbResp) {
+    ty_writeTCCDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  // step 3
+  transition(CP_OM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, O_M) {
+    // send response down to NB
+    pd_sendProbeResponseData;
+    pt_popTriggerQueue;
+  }
+
+  transition(BRW_I, PrbInvData, I_W) {
+     pd_sendProbeResponseData;
+     pp_popProbeQueue;
+   }
+
+  transition({VM_I,VO_I}, PrbInvData, I_C) {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition(VES_I, {PrbInvData,PrbInv}, I_C) {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition({VM_I, VO_I, BRW_I}, PrbInv, I_W) {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition({VM_I, VO_I, VO_S, VES_I, BRW_I}, PrbShrData) {
+    pd_sendProbeResponseData;
+    sf_setSharedFlip;
+    pp_popProbeQueue;
+  }
+
+  transition(VO_S, PrbInvData, CP_OSIW) {
+    dc_probeInvCoreData;
+    d2_probeInvL2Data;
+    pp_popProbeQueue;
+  }
+
+  transition(CP_OSIW, TCCPrbResp) {
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+  transition(CP_OSIW, CPUPrbResp) {
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+
+  transition(CP_OSIW, ProbeAcksComplete, I_C) {
+    pd_sendProbeResponseData;
+    cd_clearDirtyBitTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition({I, S, E, O, M, CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W}, StaleVic) {
+      nT_sendNullWBAckToTCC;
+      pl_popTCCRequestQueue;
+  }
+
+  transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, StaleVic) {
+      nT_sendNullWBAckToTCC;
+      pl_popTCCRequestQueue;
+  }
+
+  // Recall Transistions
+  // transient states still require the directory state
+  transition({M, O}, Recall, BRWD_I) {
+    tr_allocateTBE;
+    vd_victim;
+    dc_probeInvCoreData;
+    d2_probeInvL2Data;
+  }
+
+  transition({E, S}, Recall, BRWD_I) {
+    tr_allocateTBE;
+    vc_victim;
+    dc_probeInvCoreData;
+    d2_probeInvL2Data;
+  }
+
+  transition(I, Recall) {
+    dd_deallocateDir;
+  }
+
+  transition({BRWD_I, BRD_I}, CPUPrbResp) {
+    y_writeDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    pk_popResponseQueue;
+  }
+
+  transition({BRWD_I, BRD_I}, TCCPrbResp) {
+    ty_writeTCCDataToTBE;
+    x_decrementAcks;
+    o_checkForAckCompletion;
+    plr_popTCCResponseQueue;
+  }
+
+  transition(BRWD_I, NB_AckWB, BRD_I) {
+    pR_popResponseFromNBQueue;
+  }
+
+  transition(BRWD_I, ProbeAcksComplete, BRW_I) {
+    pt_popTriggerQueue;
+  }
+
+  transition(BRW_I, NB_AckWB, I) {
+    wb_data;
+    dt_deallocateTBE;
+    dd_deallocateDir;
+    pR_popResponseFromNBQueue;
+  }
+
+  transition(BRD_I, ProbeAcksComplete, I) {
+    wb_data;
+    dt_deallocateTBE;
+    dd_deallocateDir;
+    pt_popTriggerQueue;
+  }
+
+  // wait for stable state for Recall
+  transition({BRWD_I,BRD_I,BRW_I,CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W, CP_I}, Recall) {
+    zz_recycleRequest; // stall and wait would be for the wrong address
+    ut_updateTag; // try to find an easier recall
+  }
+
+}
diff --git a/src/mem/protocol/GPU_RfO-TCP.sm b/src/mem/protocol/GPU_RfO-TCP.sm
new file mode 100644
index 000000000..6cf9224a6
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO-TCP.sm
@@ -0,0 +1,1009 @@
+/*
+ * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
+ : GPUCoalescer* coalescer;
+   Sequencer* sequencer;
+   bool use_seq_not_coal;
+   CacheMemory * L1cache;
+   int TCC_select_num_bits;
+   Cycles issue_latency := 40;  // time to send data down to TCC
+   Cycles l2_hit_latency := 18;
+
+  MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request";
+  MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response";
+  MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
+
+  MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request";
+  MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response";
+
+  MessageBuffer * mandatoryQueue;
+{
+  state_declaration(State, desc="TCP Cache States", default="TCP_State_I") {
+    I, AccessPermission:Invalid, desc="Invalid";
+    S, AccessPermission:Read_Only, desc="Shared";
+    E, AccessPermission:Read_Write, desc="Exclusive";
+    O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line";
+    M, AccessPermission:Read_Write, desc="Modified";
+
+    I_M, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+    I_ES, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+    S_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+    O_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+
+    ES_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack";
+    MO_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for dirty WB ack";
+
+    MO_PI, AccessPermission:Read_Only, desc="L1 downgrade, waiting for CtoD ack (or ProbeInvalidateData)";
+
+    I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCC for canceled WB";
+  }
+
+  enumeration(Event, desc="TCP Events") {
+    // Core initiated
+    Load,           desc="Load";
+    Store,          desc="Store";
+
+    // TCC initiated
+    TCC_AckS,        desc="TCC Ack to Core Request";
+    TCC_AckE,        desc="TCC Ack to Core Request";
+    TCC_AckM,        desc="TCC Ack to Core Request";
+    TCC_AckCtoD,     desc="TCC Ack to Core Request";
+    TCC_AckWB,       desc="TCC Ack for clean WB";
+    TCC_NackWB,       desc="TCC Nack for clean WB";
+
+    // Mem sys initiated
+    Repl,           desc="Replacing block from cache";
+
+    // Probe Events
+    PrbInvData,         desc="probe, return O or M data";
+    PrbInv,             desc="probe, no need for data";
+    LocalPrbInv,             desc="local probe, no need for data";
+    PrbShrData,         desc="probe downgrade, return O or M data";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff than memory)?";
+    DataBlock DataBlk,          desc="data for the block";
+    bool FromL2, default="false", desc="block just moved from L2";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,             desc="Transient state";
+    DataBlock DataBlk,       desc="data for the block, required for concurrent writebacks";
+    bool Dirty,              desc="Is the data dirty (different than memory)?";
+    int NumPendingMsgs,      desc="Number of acks/data messages that this processor is waiting for";
+    bool Shared,             desc="Victim hit by shared probe";
+   }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs";
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  // Internal functions
+  Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
+    return cache_entry;
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return tbe.DataBlk;
+    } else {
+      return getCacheEntry(addr).DataBlk;
+    }
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if(is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return TCP_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return TCP_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  bool isValid(Addr addr) {
+      AccessPermission perm := getAccessPermission(addr);
+      if (perm == AccessPermission:NotPresent ||
+          perm == AccessPermission:Invalid ||
+          perm == AccessPermission:Busy) {
+          return false;
+      } else {
+          return true;
+      }
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(TCP_State_to_permission(state));
+    }
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+        L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+        L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+        L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+        L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  MachineType getCoherenceType(MachineID myMachID,
+                                      MachineID senderMachID) {
+    if(myMachID == senderMachID) {
+        return MachineType:TCP;
+    } else if(machineIDToMachineType(senderMachID) == MachineType:TCP) {
+        return MachineType:L1Cache_wCC;
+    } else if(machineIDToMachineType(senderMachID) == MachineType:TCC) {
+        return MachineType:TCC;
+    } else {
+        return MachineType:TCCdir;
+    }
+  }
+
+  // Out Ports
+
+  out_port(requestNetwork_out, CPURequestMsg, requestFromTCP);
+  out_port(responseNetwork_out, ResponseMsg, responseFromTCP);
+  out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
+
+  // In Ports
+
+  in_port(probeNetwork_in, TDProbeRequestMsg, probeToTCP) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+     peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") {
+        DPRINTF(RubySlicc, "%s\n", in_msg);
+        DPRINTF(RubySlicc, "machineID: %s\n", machineID);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == ProbeRequestType:PrbInv) {
+          if (in_msg.ReturnData) {
+            trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+          } else {
+            if(in_msg.localCtoD) {
+              trigger(Event:LocalPrbInv, in_msg.addr, cache_entry, tbe);
+            } else {
+              trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+            }
+          }
+        } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+          assert(in_msg.ReturnData);
+          trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+  in_port(responseToTCP_in, ResponseMsg, responseToTCP) {
+    if (responseToTCP_in.isReady(clockEdge())) {
+      peek(responseToTCP_in, ResponseMsg, block_on="addr") {
+
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == CoherenceResponseType:TDSysResp) {
+          if (in_msg.State == CoherenceState:Modified) {
+            if (in_msg.CtoD) {
+              trigger(Event:TCC_AckCtoD, in_msg.addr, cache_entry, tbe);
+            } else {
+              trigger(Event:TCC_AckM, in_msg.addr, cache_entry, tbe);
+            }
+          } else if (in_msg.State == CoherenceState:Shared) {
+            trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.State == CoherenceState:Exclusive) {
+            trigger(Event:TCC_AckE, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
+          trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) {
+          trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+    if (mandatoryQueue_in.isReady(clockEdge())) {
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+        Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+        TBE tbe := TBEs.lookup(in_msg.LineAddress);
+        DPRINTF(RubySlicc, "%s\n", in_msg);
+        if (in_msg.Type == RubyRequestType:LD) {
+          if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+            trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe);
+          } else {
+            Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+            trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        } else {
+          if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+            trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
+          } else {
+            Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+            trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        }
+      }
+    }
+  }
+
+  // Actions
+
+  action(ic_invCache, "ic", desc="invalidate cache") {
+    if(is_valid(cache_entry)) {
+      L1cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(n_issueRdBlk, "n", desc="Issue RdBlk") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlk;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkM;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(vd_victim, "vd", desc="Victimize M/O Data") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      assert(is_valid(cache_entry));
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicDirty;
+      out_msg.InitialRequestTime := curCycle();
+      if (cache_entry.CacheState == State:O) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+      out_msg.Dirty := cache_entry.Dirty;
+    }
+  }
+
+  action(vc_victim, "vc", desc="Victimize E/S Data") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicClean;
+      out_msg.InitialRequestTime := curCycle();
+      if (cache_entry.CacheState == State:S) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+    }
+  }
+
+  action(a_allocate, "a", desc="allocate block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L1cache.allocate(address, new Entry));
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    assert(is_valid(cache_entry));
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+    tbe.DataBlk := cache_entry.DataBlk;  // Data only used for WBs
+    tbe.Dirty := cache_entry.Dirty;
+    tbe.Shared := false;
+  }
+
+  action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+    mandatoryQueue_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+    responseToTCP_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+
+  action(l_loadDone, "l", desc="local load done") {
+    assert(is_valid(cache_entry));
+    if (use_seq_not_coal) {
+        sequencer.readCallback(address, cache_entry.DataBlk,
+                               false, MachineType:TCP);
+    } else {
+        coalescer.readCallback(address, MachineType:TCP, cache_entry.DataBlk);
+    }
+  }
+
+  action(xl_loadDone, "xl", desc="remote load done") {
+    peek(responseToTCP_in, ResponseMsg) {
+      assert(is_valid(cache_entry));
+      if (use_seq_not_coal) {
+        coalescer.recordCPReadCallBack(machineID, in_msg.Sender);
+        sequencer.readCallback(address,
+                               cache_entry.DataBlk,
+                               false,
+                               machineIDToMachineType(in_msg.Sender),
+                               in_msg.InitialRequestTime,
+                               in_msg.ForwardRequestTime,
+                               in_msg.ProbeRequestStartTime);
+      } else {
+        MachineType cc_mach_type := getCoherenceType(machineID,
+                                                            in_msg.Sender);
+        coalescer.readCallback(address,
+                               cc_mach_type,
+                               cache_entry.DataBlk,
+                               in_msg.InitialRequestTime,
+                               in_msg.ForwardRequestTime,
+                               in_msg.ProbeRequestStartTime);
+      }
+    }
+  }
+
+  action(s_storeDone, "s", desc="local store done") {
+    assert(is_valid(cache_entry));
+    if (use_seq_not_coal) {
+      coalescer.recordCPWriteCallBack(machineID, machineID);
+      sequencer.writeCallback(address, cache_entry.DataBlk,
+                              false, MachineType:TCP);
+    } else {
+      coalescer.writeCallback(address, MachineType:TCP, cache_entry.DataBlk);
+    }
+    cache_entry.Dirty := true;
+  }
+
+  action(xs_storeDone, "xs", desc="remote store done") {
+    peek(responseToTCP_in, ResponseMsg) {
+      assert(is_valid(cache_entry));
+      if (use_seq_not_coal) {
+        coalescer.recordCPWriteCallBack(machineID, in_msg.Sender);
+        sequencer.writeCallback(address,
+                                cache_entry.DataBlk,
+                                false,
+                                machineIDToMachineType(in_msg.Sender),
+                                in_msg.InitialRequestTime,
+                                in_msg.ForwardRequestTime,
+                                in_msg.ProbeRequestStartTime);
+      } else {
+        MachineType cc_mach_type := getCoherenceType(machineID,
+                                                            in_msg.Sender);
+        coalescer.writeCallback(address,
+                                cc_mach_type,
+                                cache_entry.DataBlk,
+                                in_msg.InitialRequestTime,
+                                in_msg.ForwardRequestTime,
+                                in_msg.ProbeRequestStartTime);
+      }
+      cache_entry.Dirty := true;
+    }
+  }
+
+  action(w_writeCache, "w", desc="write data to cache") {
+    peek(responseToTCP_in, ResponseMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+    peek(responseToTCP_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:StaleNotif;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                TCC_select_low_bit, TCC_select_num_bits));
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(wb_data, "wb", desc="write back data") {
+    peek(responseToTCP_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:CPUData;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                TCC_select_low_bit, TCC_select_num_bits));
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Dirty := tbe.Dirty;
+        if (tbe.Shared) {
+          out_msg.NbReqShared := true;
+        } else {
+          out_msg.NbReqShared := false;
+        }
+        out_msg.State := CoherenceState:Shared; // faux info
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(piu_sendProbeResponseInvUntransferredOwnership, "piu", desc="send probe ack inv, no data, retain ownership") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC, L3  respond in same way to probes
+      out_msg.Sender := machineID;
+      // will this always be ok? probably not for multisocket
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.UntransferredOwner :=true;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC, L3  respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and TCC respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;
+      out_msg.Ntsl := true;
+      out_msg.Hit := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and TCC respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.Dirty := false;  // only true if sending back data i think
+      out_msg.Hit := false;
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry) || is_valid(tbe));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.DataBlk := getDataBlock(address);
+      if (is_valid(tbe)) {
+        out_msg.Dirty := tbe.Dirty;
+      } else {
+        out_msg.Dirty := cache_entry.Dirty;
+      }
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.isValid := isValid(address);
+      APPEND_TRANSITION_COMMENT("Sending ack with dirty ");
+      APPEND_TRANSITION_COMMENT(out_msg.Dirty);
+    }
+  }
+
+  action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry) || is_valid(tbe));
+      assert(is_valid(cache_entry));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.DataBlk := getDataBlock(address);
+      if (is_valid(tbe)) {
+        out_msg.Dirty := tbe.Dirty;
+      } else {
+        out_msg.Dirty := cache_entry.Dirty;
+      }
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.isValid := isValid(address);
+      APPEND_TRANSITION_COMMENT("Sending ack with dirty ");
+      APPEND_TRANSITION_COMMENT(out_msg.Dirty);
+      DPRINTF(RubySlicc, "Data is %s\n", out_msg.DataBlk);
+    }
+  }
+
+  action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
+    assert(is_valid(tbe));
+    tbe.Shared := true;
+  }
+
+  action(mru_updateMRU, "mru", desc="Touch block for replacement policy") {
+    L1cache.setMRU(address);
+  }
+
+  action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+    enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      out_msg.wasValid := isValid(address);
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
+    probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+    mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  // Transitions
+
+  // transitions from base
+  transition(I, Load, I_ES) {TagArrayRead} {
+    a_allocate;
+    n_issueRdBlk;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, Store, I_M) {TagArrayRead, TagArrayWrite} {
+    a_allocate;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(S, Store, S_M) {TagArrayRead} {
+    mru_updateMRU;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(E, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    mru_updateMRU;
+    s_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(O, Store, O_M) {TagArrayRead, DataArrayWrite} {
+    mru_updateMRU;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(M, Store) {TagArrayRead, DataArrayWrite} {
+    mru_updateMRU;
+    s_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  // simple hit transitions
+  transition({S, E, O, M}, Load) {TagArrayRead, DataArrayRead} {
+    l_loadDone;
+    mru_updateMRU;
+    p_popMandatoryQueue;
+  }
+
+  // recycles from transients
+  transition({I_M, I_ES, ES_I, MO_I, S_M, O_M, MO_PI, I_C}, {Load, Store, Repl}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({S, E}, Repl, ES_I) {TagArrayRead} {
+    t_allocateTBE;
+    vc_victim;
+    ic_invCache;
+  }
+
+  transition({O, M}, Repl, MO_I) {TagArrayRead, DataArrayRead} {
+    t_allocateTBE;
+    vd_victim;
+    ic_invCache;
+  }
+
+  // TD event transitions
+  transition(I_M, {TCC_AckM, TCC_AckCtoD}, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    w_writeCache;
+    xs_storeDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_ES, TCC_AckS, S) {TagArrayWrite,  DataArrayWrite} {
+    w_writeCache;
+    xl_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_ES, TCC_AckE, E) {TagArrayWrite,  DataArrayWrite} {
+    w_writeCache;
+    xl_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition({S_M, O_M}, TCC_AckM, M) {TagArrayWrite, DataArrayWrite} {
+    xs_storeDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition({MO_I, ES_I}, TCC_NackWB, I){TagArrayWrite} {
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition({MO_I, ES_I}, TCC_AckWB, I) {TagArrayWrite, DataArrayRead} {
+    wb_data;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(I_C, TCC_AckWB, I) {TagArrayWrite} {
+    ss_sendStaleNotification;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(I_C, TCC_NackWB, I) {TagArrayWrite} {
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  // Probe transitions
+  transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
+    pd_sendProbeResponseData;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(I, PrbInvData) {TagArrayRead, TagArrayWrite} {
+    prm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  transition({E, S}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
+    pd_sendProbeResponseData;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, PrbInvData, I_C) {} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  // Needed for TCC-based protocols. Must hold on to ownership till transfer complete
+  transition({M, O}, LocalPrbInv, MO_PI){TagArrayRead, TagArrayWrite} {
+    piu_sendProbeResponseInvUntransferredOwnership;
+    pp_popProbeQueue;
+  }
+
+  // If there is a race and we see a probe invalidate, handle normally.
+  transition(MO_PI, PrbInvData, I){TagArrayWrite} {
+    pd_sendProbeResponseData;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_PI, PrbInv, I){TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  // normal exit when ownership is successfully transferred
+  transition(MO_PI, TCC_AckCtoD, I) {TagArrayWrite} {
+    ic_invCache;
+    pr_popResponseQueue;
+  }
+
+  transition({M, O, E, S, I}, PrbInv, I)  {TagArrayRead, TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition({E, S, I}, LocalPrbInv, I){TagArrayRead, TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+
+  transition({M, E, O}, PrbShrData, O) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_PI, PrbShrData) {DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+
+  transition(S, PrbShrData, S) {TagArrayRead, DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({I, I_C}, PrbShrData) {TagArrayRead} {
+    prm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, PrbInv, I_C) {} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition({I_M, I_ES}, {PrbInv, PrbInvData}){TagArrayRead} {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    a_allocate;  // but make sure there is room for incoming data when it arrives
+    pp_popProbeQueue;
+  }
+
+  transition({I_M, I_ES}, PrbShrData) {} {
+    prm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  transition(S_M, PrbInvData, I_M) {TagArrayRead} {
+    pim_sendProbeResponseInvMs;
+    ic_invCache;
+    a_allocate;
+    pp_popProbeQueue;
+  }
+
+  transition(O_M, PrbInvData, I_M) {TagArrayRead,DataArrayRead} {
+    pdm_sendProbeResponseDataMs;
+    ic_invCache;
+    a_allocate;
+    pp_popProbeQueue;
+  }
+
+  transition({S_M, O_M}, {PrbInv}, I_M) {TagArrayRead} {
+    pim_sendProbeResponseInvMs;
+    ic_invCache;
+    a_allocate;
+    pp_popProbeQueue;
+  }
+
+  transition(S_M, {LocalPrbInv}, I_M) {TagArrayRead} {
+    pim_sendProbeResponseInvMs;
+    ic_invCache;
+    a_allocate;
+    pp_popProbeQueue;
+  }
+
+  transition(O_M, LocalPrbInv, I_M) {TagArrayRead} {
+    piu_sendProbeResponseInvUntransferredOwnership;
+    ic_invCache;
+    a_allocate;
+    pp_popProbeQueue;
+  }
+
+  transition({S_M, O_M}, PrbShrData) {DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, PrbInvData, I_C){
+    pd_sendProbeResponseData;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbInvData, I_C) {DataArrayRead} {
+    pd_sendProbeResponseData;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbInv, I_C) {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, PrbInv, I_C) {
+    pi_sendProbeResponseInv;
+    ic_invCache;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, PrbShrData, ES_I) {DataArrayRead} {
+    pd_sendProbeResponseData;
+    sf_setSharedFlip;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbShrData, MO_I) {DataArrayRead} {
+    pd_sendProbeResponseData;
+    sf_setSharedFlip;
+    pp_popProbeQueue;
+  }
+
+}
diff --git a/src/mem/protocol/GPU_RfO.slicc b/src/mem/protocol/GPU_RfO.slicc
new file mode 100644
index 000000000..7773ce6e0
--- /dev/null
+++ b/src/mem/protocol/GPU_RfO.slicc
@@ -0,0 +1,11 @@
+protocol "GPU_AMD_Base";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-dir.sm";
+include "MOESI_AMD_Base-CorePair.sm";
+include "GPU_RfO-TCP.sm";
+include "GPU_RfO-SQC.sm";
+include "GPU_RfO-TCC.sm";
+include "GPU_RfO-TCCdir.sm";
+include "MOESI_AMD_Base-L3cache.sm";
+include "MOESI_AMD_Base-RegionBuffer.sm";
diff --git a/src/mem/protocol/GPU_VIPER-SQC.sm b/src/mem/protocol/GPU_VIPER-SQC.sm
new file mode 100644
index 000000000..8d5b5699a
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER-SQC.sm
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Blake Hechtman
+ */
+
+machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
+ : Sequencer* sequencer;
+   CacheMemory * L1cache;
+   int TCC_select_num_bits;
+   Cycles issue_latency := 80;  // time to send data down to TCC
+   Cycles l2_hit_latency := 18; // for 1MB L2, 20 for 2MB
+
+  MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request";
+
+  MessageBuffer * probeToSQC, network="From", virtual_network="1", vnet_type="request";
+  MessageBuffer * responseToSQC, network="From", virtual_network="3", vnet_type="response";
+
+  MessageBuffer * mandatoryQueue;
+{
+  state_declaration(State, desc="SQC Cache States", default="SQC_State_I") {
+    I, AccessPermission:Invalid, desc="Invalid";
+    V, AccessPermission:Read_Only, desc="Valid";
+  }
+
+  enumeration(Event, desc="SQC Events") {
+    // Core initiated
+    Fetch,          desc="Fetch";
+    // Mem sys initiated
+    Repl,           desc="Replacing block from cache";
+    Data,           desc="Received Data";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff than memory)?";
+    DataBlock DataBlk,          desc="data for the block";
+    bool FromL2, default="false", desc="block just moved from L2";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,             desc="Transient state";
+    DataBlock DataBlk,       desc="data for the block, required for concurrent writebacks";
+    bool Dirty,              desc="Is the data dirty (different than memory)?";
+    int NumPendingMsgs,      desc="Number of acks/data messages that this processor is waiting for";
+    bool Shared,             desc="Victim hit by shared probe";
+   }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<SQC_TBE>", constructor="m_number_of_TBEs";
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  // Internal functions
+  Tick clockEdge();
+
+  Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
+    return cache_entry;
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return tbe.DataBlk;
+    } else {
+      return getCacheEntry(addr).DataBlk;
+    }
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if(is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+    }
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes +
+        functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return SQC_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return SQC_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(SQC_State_to_permission(state));
+    }
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+        L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+        L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+        L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+        L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  // Out Ports
+
+  out_port(requestNetwork_out, CPURequestMsg, requestFromSQC);
+
+  // In Ports
+
+  in_port(responseToSQC_in, ResponseMsg, responseToSQC) {
+    if (responseToSQC_in.isReady(clockEdge())) {
+      peek(responseToSQC_in, ResponseMsg, block_on="addr") {
+
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == CoherenceResponseType:TDSysResp) {
+          if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) {
+            trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+          } else {
+            Addr victim := L1cache.cacheProbe(in_msg.addr);
+            trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        } else {
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+    if (mandatoryQueue_in.isReady(clockEdge())) {
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+        Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+        TBE tbe := TBEs.lookup(in_msg.LineAddress);
+
+        assert(in_msg.Type == RubyRequestType:IFETCH);
+        trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe);
+      }
+    }
+  }
+
+  // Actions
+
+  action(ic_invCache, "ic", desc="invalidate cache") {
+    if(is_valid(cache_entry)) {
+      L1cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlk;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(a_allocate, "a", desc="allocate block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L1cache.allocate(address, new Entry));
+    }
+  }
+
+  action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+    mandatoryQueue_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+    responseToSQC_in.dequeue(clockEdge());
+  }
+
+  action(l_loadDone, "l", desc="local load done") {
+    assert(is_valid(cache_entry));
+    sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
+    APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
+  }
+
+  action(w_writeCache, "w", desc="write data to cache") {
+    peek(responseToSQC_in, ResponseMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := false;
+    }
+  }
+
+  // Transitions
+
+  // transitions from base
+  transition({I, V}, Repl, I) {TagArrayRead, TagArrayWrite} {
+    ic_invCache
+  }
+
+  transition(I, Data, V) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+    a_allocate;
+    w_writeCache
+    l_loadDone;
+    pr_popResponseQueue;
+  }
+
+  transition(I, Fetch) {TagArrayRead, TagArrayWrite} {
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  // simple hit transitions
+  transition(V, Fetch) {TagArrayRead, DataArrayRead} {
+    l_loadDone;
+    p_popMandatoryQueue;
+  }
+}
diff --git a/src/mem/protocol/GPU_VIPER-TCC.sm b/src/mem/protocol/GPU_VIPER-TCC.sm
new file mode 100644
index 000000000..f62df9f4f
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER-TCC.sm
@@ -0,0 +1,739 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Blake Hechtman
+ */
+
+machine(MachineType:TCC, "TCC Cache")
+ : CacheMemory * L2cache;
+   bool WB; /*is this cache Writeback?*/
+   Cycles l2_request_latency := 50;
+   Cycles l2_response_latency := 20;
+
+  // From the TCPs or SQCs
+  MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request";
+  // To the Cores. TCC deals only with TCPs/SQCs.
+  MessageBuffer * responseToCore, network="To", virtual_network="3", vnet_type="response";
+  // From the NB
+  MessageBuffer * probeFromNB, network="From", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseFromNB, network="From", virtual_network="2", vnet_type="response";
+  // To the NB
+  MessageBuffer * requestToNB, network="To", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseToNB, network="To", virtual_network="2", vnet_type="response";
+  MessageBuffer * unblockToNB, network="To", virtual_network="4", vnet_type="unblock";
+
+  MessageBuffer * triggerQueue;
+
+{
+  // EVENTS
+  enumeration(Event, desc="TCC Events") {
+    // Requests coming from the Cores
+    RdBlk,                  desc="RdBlk event";
+    WrVicBlk,               desc="L1 Write Through";
+    WrVicBlkBack,           desc="L1 Write Through(dirty cache)";
+    Atomic,                 desc="Atomic Op";
+    AtomicDone,             desc="AtomicOps Complete";
+    AtomicNotDone,          desc="AtomicOps not Complete";
+    Data,                   desc="data messgae";
+    // Coming from this TCC
+    L2_Repl,                desc="L2 Replacement";
+    // Probes
+    PrbInv,                 desc="Invalidating probe";
+    // Coming from Memory Controller
+    WBAck,                  desc="writethrough ack from memory";
+  }
+
+  // STATES
+  state_declaration(State, desc="TCC State", default="TCC_State_I") {
+    M, AccessPermission:Read_Write, desc="Modified(dirty cache only)";
+    W, AccessPermission:Read_Write, desc="Written(dirty cache only)";
+    V, AccessPermission:Read_Only,  desc="Valid";
+    I, AccessPermission:Invalid,    desc="Invalid";
+    IV, AccessPermission:Busy,      desc="Waiting for Data";
+    WI, AccessPermission:Busy,      desc="Waiting on Writethrough Ack";
+    A, AccessPermission:Busy,       desc="Invalid waiting on atomici Data";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+
+  // STRUCTURES
+
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff from memory?)";
+    DataBlock DataBlk,          desc="Data for the block";
+    WriteMask writeMask,        desc="Dirty byte mask";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,     desc="Transient state";
+    DataBlock DataBlk,  desc="data for the block";
+    bool Dirty,         desc="Is the data dirty?";
+    bool Shared,        desc="Victim hit by shared probe";
+    MachineID From,     desc="Waiting for writeback from...";
+    NetDest Destination, desc="Data destination";
+    int numAtomics,     desc="number remaining atomics";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs";
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+
+
+  // FUNCTION DEFINITIONS
+  Tick clockEdge();
+
+  Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+    return static_cast(Entry, "pointer", L2cache.lookup(addr));
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    return getCacheEntry(addr).DataBlk;
+  }
+
+  bool presentOrAvail(Addr addr) {
+    return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if (is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+        tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+        cache_entry.CacheState := state;
+    }
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes +
+        functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return TCC_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return TCC_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(TCC_State_to_permission(state));
+    }
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+        L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+        L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+        L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+        L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+
+  // ** OUT_PORTS **
+
+  // Three classes of ports
+  // Class 1: downward facing network links to NB
+  out_port(requestToNB_out, CPURequestMsg, requestToNB);
+  out_port(responseToNB_out, ResponseMsg, responseToNB);
+  out_port(unblockToNB_out, UnblockMsg, unblockToNB);
+
+  // Class 2: upward facing ports to GPU cores
+  out_port(responseToCore_out, ResponseMsg, responseToCore);
+
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+  //
+  // request queue going to NB
+  //
+
+
+// ** IN_PORTS **
+  in_port(triggerQueue_in, TiggerMsg, triggerQueue) {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (tbe.numAtomics == 0) {
+            trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe);
+        } else {
+            trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+
+
+  in_port(responseFromNB_in, ResponseMsg, responseFromNB) {
+    if (responseFromNB_in.isReady(clockEdge())) {
+      peek(responseFromNB_in, ResponseMsg, block_on="addr") {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+          if(presentOrAvail(in_msg.addr)) {
+            trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+          } else {
+            Addr victim :=  L2cache.cacheProbe(in_msg.addr);
+            trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+          trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+
+  // Finally handling incoming requests (from TCP) and probes (from NB).
+  in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+      peek(probeNetwork_in, NBProbeRequestMsg) {
+        DPRINTF(RubySlicc, "%s\n", in_msg);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+      }
+    }
+  }
+
+  in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) {
+    if (coreRequestNetwork_in.isReady(clockEdge())) {
+      peek(coreRequestNetwork_in, CPURequestMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+            if(WB) {
+                if(presentOrAvail(in_msg.addr)) {
+                    trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe);
+                } else {
+                    Addr victim :=  L2cache.cacheProbe(in_msg.addr);
+                    trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+                }
+            } else {
+                trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
+            }
+        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+          trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
+          trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+        } else {
+          DPRINTF(RubySlicc, "%s\n", in_msg);
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+  // BEGIN ACTIONS
+
+  action(i_invL2, "i", desc="invalidate TCC cache block") {
+    if (is_valid(cache_entry)) {
+        L2cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(sd_sendData, "sd", desc="send Shared response") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.Dirty := false;
+        out_msg.State := CoherenceState:Shared;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+
+  action(sdr_sendDataResponse, "sdr", desc="send Shared response") {
+    enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:TDSysResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination := tbe.Destination;
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := false;
+      out_msg.State := CoherenceState:Shared;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+    enqueue(unblockToNB_out, UnblockMsg, 1) {
+      out_msg.addr := address;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+
+  action(rd_requestData, "r", desc="Miss in L2, pass on") {
+    if(tbe.Destination.count()==1){
+      peek(coreRequestNetwork_in, CPURequestMsg) {
+        enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+          out_msg.addr := address;
+          out_msg.Type := in_msg.Type;
+          out_msg.Requestor := machineID;
+          out_msg.Destination.add(map_Address_to_Directory(address));
+          out_msg.Shared := false; // unneeded for this request
+          out_msg.MessageSize := in_msg.MessageSize;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+      }
+    }
+  }
+
+  action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+    peek(responseFromNB_in, ResponseMsg) {
+        enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:TDSysWBAck;
+          out_msg.Destination.clear();
+          out_msg.Destination.add(in_msg.WTRequestor);
+          out_msg.Sender := machineID;
+          out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        }
+    }
+  }
+
+  action(swb_sendWBAck, "swb", desc="send WB Ack") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysWBAck;
+        out_msg.Destination.clear();
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+      }
+    }
+  }
+
+  action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") {
+    peek(responseFromNB_in, ResponseMsg) {
+        enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:TDSysResp;
+          out_msg.Destination.add(in_msg.WTRequestor);
+          out_msg.Sender := machineID;
+          out_msg.MessageSize := in_msg.MessageSize;
+          out_msg.DataBlk := in_msg.DataBlk;
+        }
+    }
+  }
+
+  action(a_allocateBlock, "a", desc="allocate TCC block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L2cache.allocate(address, new Entry));
+      cache_entry.writeMask.clear();
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    if (is_invalid(tbe)) {
+      check_allocate(TBEs);
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      tbe.Destination.clear();
+      tbe.numAtomics := 0;
+    }
+    if (coreRequestNetwork_in.isReady(clockEdge())) {
+      peek(coreRequestNetwork_in, CPURequestMsg) {
+        if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
+          tbe.Destination.add(in_msg.Requestor);
+        }
+      }
+    }
+  }
+
+  action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
+    tbe.Destination.clear();
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") {
+    peek(responseFromNB_in, ResponseMsg) {
+      cache_entry.DataBlk := in_msg.DataBlk;
+      DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+    }
+  }
+
+  action(wdb_writeDirtyBytes, "wdb", desc="write data to TCC") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      cache_entry.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
+      cache_entry.writeMask.orMask(in_msg.writeMask);
+      DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+    }
+  }
+
+  action(wt_writeThrough, "wt", desc="write back data") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+        out_msg.addr := address;
+        out_msg.Requestor := machineID;
+        out_msg.WTRequestor := in_msg.Requestor;
+        out_msg.Destination.add(map_Address_to_Directory(address));
+        out_msg.MessageSize := MessageSizeType:Data;
+        out_msg.Type := CoherenceRequestType:WriteThrough;
+        out_msg.Dirty := true;
+        out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.writeMask.orMask(in_msg.writeMask);
+      }
+    }
+  }
+
+  action(wb_writeBack, "wb", desc="write back data") {
+    enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      out_msg.WTRequestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Data;
+      out_msg.Type := CoherenceRequestType:WriteThrough;
+      out_msg.Dirty := true;
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.writeMask.orMask(cache_entry.writeMask);
+    }
+  }
+
+  action(at_atomicThrough, "at", desc="write back data") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+        out_msg.addr := address;
+        out_msg.Requestor := machineID;
+        out_msg.WTRequestor := in_msg.Requestor;
+        out_msg.Destination.add(map_Address_to_Directory(address));
+        out_msg.MessageSize := MessageSizeType:Data;
+        out_msg.Type := CoherenceRequestType:Atomic;
+        out_msg.Dirty := true;
+        out_msg.writeMask.orMask(in_msg.writeMask);
+      }
+    }
+  }
+
+  action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+    enqueue(responseToNB_out, ResponseMsg, 1) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC, L3  respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+  action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+    L2cache.setMRU(address);
+  }
+
+  action(p_popRequestQueue, "p", desc="pop request queue") {
+    coreRequestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="pop response queue") {
+    responseFromNB_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+
+  action(z_stall, "z", desc="stall") {
+      // built-in
+  }
+
+
+  action(ina_incrementNumAtomics, "ina", desc="inc num atomics") {
+    tbe.numAtomics := tbe.numAtomics + 1;
+  }
+
+
+  action(dna_decrementNumAtomics, "dna", desc="inc num atomics") {
+    tbe.numAtomics := tbe.numAtomics - 1;
+    if (tbe.numAtomics==0) {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:AtomicDone;
+      }
+    }
+  }
+
+  action(ptr_popTriggerQueue, "ptr", desc="pop Trigger") {
+    triggerQueue_in.dequeue(clockEdge());
+  }
+
+  // END ACTIONS
+
+  // BEGIN TRANSITIONS
+  // transitions from base
+  // Assumptions for ArrayRead/Write
+  // TBE checked before tags
+  // Data Read/Write requires Tag Read
+
+  // Stalling transitions do NOT check the tag array...and if they do,
+  // they can cause a resource stall deadlock!
+
+  transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
+      z_stall;
+  }
+  transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) { //TagArrayRead} {
+      z_stall;
+  }
+  transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
+      z_stall;
+  }
+  transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} {
+    sd_sendData;
+    ut_updateTag;
+    p_popRequestQueue;
+  }
+  transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} {
+    t_allocateTBE;
+    wb_writeBack;
+  }
+
+  transition(I, RdBlk, IV) {TagArrayRead} {
+    t_allocateTBE;
+    rd_requestData;
+    p_popRequestQueue;
+  }
+
+  transition(IV, RdBlk) {
+    t_allocateTBE;
+    rd_requestData;
+    p_popRequestQueue;
+  }
+
+  transition({V, I},Atomic, A) {TagArrayRead} {
+    i_invL2;
+    t_allocateTBE;
+    at_atomicThrough;
+    ina_incrementNumAtomics;
+    p_popRequestQueue;
+  }
+
+  transition(A, Atomic) {
+    at_atomicThrough;
+    ina_incrementNumAtomics;
+    p_popRequestQueue;
+  }
+
+  transition({M, W}, Atomic, WI) {TagArrayRead} {
+    t_allocateTBE;
+    wb_writeBack;
+  }
+
+  transition(I, WrVicBlk) {TagArrayRead} {
+    wt_writeThrough;
+    p_popRequestQueue;
+  }
+
+  transition(V, WrVicBlk) {TagArrayRead, DataArrayWrite} {
+    ut_updateTag;
+    wdb_writeDirtyBytes;
+    wt_writeThrough;
+    p_popRequestQueue;
+  }
+
+  transition({V, M}, WrVicBlkBack, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    ut_updateTag;
+    swb_sendWBAck;
+    wdb_writeDirtyBytes;
+    p_popRequestQueue;
+  }
+
+  transition(W, WrVicBlkBack) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    ut_updateTag;
+    swb_sendWBAck;
+    wdb_writeDirtyBytes;
+    p_popRequestQueue;
+  }
+
+  transition(I, WrVicBlkBack, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    a_allocateBlock;
+    ut_updateTag;
+    swb_sendWBAck;
+    wdb_writeDirtyBytes;
+    p_popRequestQueue;
+  }
+
+  transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} {
+    t_allocateTBE;
+    wb_writeBack;
+    i_invL2;
+  }
+
+  transition({I, V}, L2_Repl, I) {TagArrayRead, TagArrayWrite} {
+    i_invL2;
+  }
+
+  transition({A, IV, WI}, L2_Repl) {
+    i_invL2;
+  }
+
+  transition({I, V}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition(M, PrbInv, W) {TagArrayRead, TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition(W, PrbInv) {TagArrayRead} {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition({A, IV, WI}, PrbInv) {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    a_allocateBlock;
+    ut_updateTag;
+    wcb_writeCacheBlock;
+    sdr_sendDataResponse;
+    pr_popResponseQueue;
+    dt_deallocateTBE;
+  }
+
+  transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    a_allocateBlock;
+    ar_sendAtomicResponse;
+    dna_decrementNumAtomics;
+    pr_popResponseQueue;
+  }
+
+  transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} {
+    dt_deallocateTBE;
+    ptr_popTriggerQueue;
+  }
+
+  transition(A, AtomicNotDone) {TagArrayRead} {
+    ptr_popTriggerQueue;
+  }
+
+  //M,W should not see WBAck as the cache is in WB mode
+  //WBAcks do not need to check tags
+  transition({I, V, IV, A}, WBAck) {
+    w_sendResponseWBAck;
+    pr_popResponseQueue;
+  }
+
+  transition(WI, WBAck,I) {
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+}
diff --git a/src/mem/protocol/GPU_VIPER-TCP.sm b/src/mem/protocol/GPU_VIPER-TCP.sm
new file mode 100644
index 000000000..d81196b17
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER-TCP.sm
@@ -0,0 +1,747 @@
+/*
+ * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Blake Hechtman
+ */
+
+machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
+ : VIPERCoalescer* coalescer;
+   Sequencer* sequencer;
+   bool use_seq_not_coal;
+   CacheMemory * L1cache;
+   bool WB; /*is this cache Writeback?*/
+   bool disableL1; /* bypass L1 cache? */
+   int TCC_select_num_bits;
+   Cycles issue_latency := 40;  // time to send data down to TCC
+   Cycles l2_hit_latency := 18;
+
+  MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request";
+  MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response";
+  MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
+
+  MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request";
+  MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response";
+  MessageBuffer * mandatoryQueue;
+
+{
+  state_declaration(State, desc="TCP Cache States", default="TCP_State_I") {
+    I, AccessPermission:Invalid, desc="Invalid";
+    V, AccessPermission:Read_Only, desc="Valid";
+    W, AccessPermission:Read_Write, desc="Written";
+    M, AccessPermission:Read_Write, desc="Written and Valid";
+    L, AccessPermission:Read_Write, desc="Local access is modifable";
+    A, AccessPermission:Invalid, desc="Waiting on Atomic";
+  }
+
+  enumeration(Event, desc="TCP Events") {
+    // Core initiated
+    Load,           desc="Load";
+    Store,          desc="Store to L1 (L1 is dirty)";
+    StoreThrough,   desc="Store directly to L2(L1 is clean)";
+    StoreLocal,     desc="Store to L1 but L1 is clean";
+    Atomic,         desc="Atomic";
+    Flush,          desc="Flush if dirty(wbL1 for Store Release)";
+    Evict,          desc="Evict if clean(invL1 for Load Acquire)";
+    // Mem sys initiated
+    Repl,           desc="Replacing block from cache";
+
+    // TCC initiated
+    TCC_Ack,        desc="TCC Ack to Core Request";
+    TCC_AckWB,      desc="TCC Ack for WB";
+    // Disable L1 cache
+    Bypass,         desc="Bypass the entire L1 cache";
+ }
+
+  enumeration(RequestType,
+              desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+    TagArrayFlash,    desc="Flash clear the data array";
+  }
+
+
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff than memory)?";
+    DataBlock DataBlk,          desc="data for the block";
+    bool FromL2, default="false", desc="block just moved from L2";
+    WriteMask writeMask, desc="written bytes masks";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,    desc="Transient state";
+    DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
+    bool Dirty,        desc="Is the data dirty (different than memory)?";
+    int NumPendingMsgs,desc="Number of acks/data messages that this processor is waiting for";
+    bool Shared,       desc="Victim hit by shared probe";
+   }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs";
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int WTcnt, default="0";
+  int Fcnt, default="0";
+  bool inFlush, default="false";
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  // Internal functions
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+  Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
+    return cache_entry;
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return tbe.DataBlk;
+    } else {
+      return getCacheEntry(addr).DataBlk;
+    }
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if (is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+    }
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes +
+        functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return TCP_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return TCP_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  bool isValid(Addr addr) {
+      AccessPermission perm := getAccessPermission(addr);
+      if (perm == AccessPermission:NotPresent ||
+          perm == AccessPermission:Invalid ||
+          perm == AccessPermission:Busy) {
+          return false;
+      } else {
+          return true;
+      }
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(TCP_State_to_permission(state));
+    }
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+        L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+        L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+        L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:TagArrayFlash) {
+        L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+        L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayFlash) {
+      // FIXME should check once per cache, rather than once per cacheline
+      return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  // Out Ports
+
+  out_port(requestNetwork_out, CPURequestMsg, requestFromTCP);
+
+  // In Ports
+
+  in_port(responseToTCP_in, ResponseMsg, responseToTCP) {
+    if (responseToTCP_in.isReady(clockEdge())) {
+      peek(responseToTCP_in, ResponseMsg, block_on="addr") {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:TDSysResp) {
+          // disable L1 cache
+          if (disableL1) {
+	    trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
+          } else {
+            if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) {
+              trigger(Event:TCC_Ack, in_msg.addr, cache_entry, tbe);
+            } else {
+              Addr victim := L1cache.cacheProbe(in_msg.addr);
+              trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+            }
+          }
+        } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck ||
+                     in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+            trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
+          } else {
+            error("Unexpected Response Message to Core");
+          }
+      }
+    }
+  }
+
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+    if (mandatoryQueue_in.isReady(clockEdge())) {
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+        Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+        TBE tbe := TBEs.lookup(in_msg.LineAddress);
+        DPRINTF(RubySlicc, "%s\n", in_msg);
+        if (in_msg.Type == RubyRequestType:LD) {
+          trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe);
+        } else if (in_msg.Type == RubyRequestType:ATOMIC) {
+          trigger(Event:Atomic, in_msg.LineAddress, cache_entry, tbe);
+        } else if (in_msg.Type == RubyRequestType:ST) {
+          if(disableL1) {
+            trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
+          } else {
+            if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+              if (in_msg.segment == HSASegment:SPILL) {
+                trigger(Event:StoreLocal, in_msg.LineAddress, cache_entry, tbe);
+              } else if (WB) {
+                trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
+              } else {
+                trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
+              }
+            } else {
+              Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+              trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+            }
+          } // end if (disableL1)
+        } else if (in_msg.Type == RubyRequestType:FLUSH) {
+            trigger(Event:Flush, in_msg.LineAddress, cache_entry, tbe);
+        } else if (in_msg.Type == RubyRequestType:REPLACEMENT){
+            trigger(Event:Evict, in_msg.LineAddress, cache_entry, tbe);
+        } else {
+          error("Unexpected Request Message from VIC");
+          if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
+            if (WB) {
+                trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
+            } else {
+                trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
+            }
+          } else {
+            Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
+            trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        }
+      }
+    }
+  }
+
+  // Actions
+
+  action(ic_invCache, "ic", desc="invalidate cache") {
+    if(is_valid(cache_entry)) {
+      cache_entry.writeMask.clear();
+      L1cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(n_issueRdBlk, "n", desc="Issue RdBlk") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlk;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(rb_bypassDone, "rb", desc="bypass L1 of read access") {
+    peek(responseToTCP_in, ResponseMsg) {
+      DataBlock tmp:= in_msg.DataBlk;
+      if (use_seq_not_coal) {
+        sequencer.readCallback(address, tmp, false, MachineType:L1Cache);
+      } else {
+        coalescer.readCallback(address, MachineType:L1Cache, tmp);
+      }
+      if(is_valid(cache_entry)) {
+        unset_cache_entry();
+      }
+    }
+  }
+
+  action(wab_bypassDone, "wab", desc="bypass L1 of write access") {
+    peek(responseToTCP_in, ResponseMsg) {
+      DataBlock tmp := in_msg.DataBlk;
+      if (use_seq_not_coal) {
+        sequencer.writeCallback(address, tmp, false, MachineType:L1Cache);
+      } else {
+        coalescer.writeCallback(address, MachineType:L1Cache, tmp);
+      }
+    }
+  }
+
+  action(norl_issueRdBlkOrloadDone, "norl", desc="local load done") {
+    peek(mandatoryQueue_in, RubyRequest){
+      if (cache_entry.writeMask.cmpMask(in_msg.writeMask)) {
+          if (use_seq_not_coal) {
+            sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
+          } else {
+            coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
+          }
+      } else {
+        enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceRequestType:RdBlk;
+          out_msg.Requestor := machineID;
+          out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                              TCC_select_low_bit, TCC_select_num_bits));
+          out_msg.MessageSize := MessageSizeType:Request_Control;
+          out_msg.InitialRequestTime := curCycle();
+        }
+      }
+    }
+  }
+
+  action(wt_writeThrough, "wt", desc="Flush dirty data") {
+    WTcnt := WTcnt + 1;
+    APPEND_TRANSITION_COMMENT("write++ = ");
+    APPEND_TRANSITION_COMMENT(WTcnt);
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      assert(is_valid(cache_entry));
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.writeMask.clear();
+      out_msg.writeMask.orMask(cache_entry.writeMask);
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Data;
+      out_msg.Type := CoherenceRequestType:WriteThrough;
+      out_msg.InitialRequestTime := curCycle();
+      out_msg.Shared := false;
+    }
+  }
+
+  action(at_atomicThrough, "at", desc="send Atomic") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Requestor := machineID;
+        out_msg.writeMask.clear();
+        out_msg.writeMask.orMask(in_msg.writeMask);
+        out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                TCC_select_low_bit, TCC_select_num_bits));
+        out_msg.MessageSize := MessageSizeType:Data;
+        out_msg.Type := CoherenceRequestType:Atomic;
+        out_msg.InitialRequestTime := curCycle();
+        out_msg.Shared := false;
+      }
+    }
+  }
+
+  action(a_allocate, "a", desc="allocate block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L1cache.allocate(address, new Entry));
+    }
+    cache_entry.writeMask.clear();
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+  }
+
+  action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(sf_setFlush, "sf", desc="set flush") {
+    inFlush := true;
+    APPEND_TRANSITION_COMMENT(" inFlush is true");
+  }
+
+  action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+    mandatoryQueue_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+    responseToTCP_in.dequeue(clockEdge());
+  }
+
+  action(l_loadDone, "l", desc="local load done") {
+    assert(is_valid(cache_entry));
+    if (use_seq_not_coal) {
+      sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
+    } else {
+      coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
+    }
+  }
+
+  action(s_storeDone, "s", desc="local store done") {
+    assert(is_valid(cache_entry));
+
+    if (use_seq_not_coal) {
+      sequencer.writeCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
+    } else {
+      coalescer.writeCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
+    }
+    cache_entry.Dirty := true;
+  }
+
+  action(inv_invDone, "inv", desc="local inv done") {
+    if (use_seq_not_coal) {
+        DPRINTF(RubySlicc, "Sequencer does not define invCallback!\n");
+        assert(false);
+    } else {
+      coalescer.invCallback(address);
+    }
+  }
+
+  action(wb_wbDone, "wb", desc="local wb done") {
+    if (inFlush == true) {
+      Fcnt := Fcnt + 1;
+      if (Fcnt > WTcnt) {
+        if (use_seq_not_coal) {
+            DPRINTF(RubySlicc, "Sequencer does not define wbCallback!\n");
+            assert(false);
+        } else {
+          coalescer.wbCallback(address);
+        }
+        Fcnt := Fcnt - 1;
+      }
+      if (WTcnt == 0 && Fcnt == 0) {
+        inFlush := false;
+        APPEND_TRANSITION_COMMENT(" inFlush is false");
+      }
+    }
+  }
+
+  action(wd_wtDone, "wd", desc="writethrough done") {
+    WTcnt := WTcnt - 1;
+    if (inFlush == true) {
+      Fcnt := Fcnt -1;
+    }
+    assert(WTcnt >= 0);
+    APPEND_TRANSITION_COMMENT("write-- = ");
+    APPEND_TRANSITION_COMMENT(WTcnt);
+  }
+
+  action(dw_dirtyWrite, "dw", desc="update write mask"){
+    peek(mandatoryQueue_in, RubyRequest) {
+      cache_entry.DataBlk.copyPartial(in_msg.WTData,in_msg.writeMask);
+      cache_entry.writeMask.orMask(in_msg.writeMask);
+    }
+  }
+  action(w_writeCache, "w", desc="write data to cache") {
+    peek(responseToTCP_in, ResponseMsg) {
+      assert(is_valid(cache_entry));
+      DataBlock tmp := in_msg.DataBlk;
+      tmp.copyPartial(cache_entry.DataBlk,cache_entry.writeMask);
+      cache_entry.DataBlk := tmp;
+    }
+  }
+
+  action(mru_updateMRU, "mru", desc="Touch block for replacement policy") {
+    L1cache.setMRU(address);
+  }
+
+//  action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+//    mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+//  }
+
+  action(z_stall, "z", desc="stall; built-in") {
+      // built-int action
+  }
+
+  // Transitions
+  // ArrayRead/Write assumptions:
+  // All requests read Tag Array
+  // TBE allocation write the TagArray to I
+  // TBE only checked on misses
+  // Stores will also write dirty bits in the tag
+  // WriteThroughs still need to use cache entry as staging buffer for wavefront
+
+  // Stalling transitions do NOT check the tag array...and if they do,
+  // they can cause a resource stall deadlock!
+
+  transition({A}, {Load, Store, Atomic, StoreThrough}) { //TagArrayRead} {
+      z_stall;
+  }
+
+  transition({M, V, L}, Load) {TagArrayRead, DataArrayRead} {
+    l_loadDone;
+    mru_updateMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, Load) {TagArrayRead} {
+    n_issueRdBlk;
+    p_popMandatoryQueue;
+  }
+
+  transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    mru_updateMRU;
+    at_atomicThrough;
+    p_popMandatoryQueue;
+  }
+
+  transition({M, W}, Atomic, A) {TagArrayRead, TagArrayWrite} {
+    wt_writeThrough;
+    t_allocateTBE;
+    at_atomicThrough;
+    ic_invCache;
+  }
+
+  transition(W, Load, I) {TagArrayRead, DataArrayRead} {
+    wt_writeThrough;
+    norl_issueRdBlkOrloadDone;
+    p_popMandatoryQueue;
+  }
+
+  transition({I}, StoreLocal, L) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    a_allocate;
+    dw_dirtyWrite;
+    s_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition({L, V}, StoreLocal, L) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    dw_dirtyWrite;
+    mru_updateMRU;
+    s_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, Store, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    a_allocate;
+    dw_dirtyWrite;
+    s_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(V, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    dw_dirtyWrite;
+    mru_updateMRU;
+    s_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition({M, W}, Store) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    dw_dirtyWrite;
+    mru_updateMRU;
+    s_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  //M,W should not see storeThrough
+  transition(I, StoreThrough) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    a_allocate;
+    dw_dirtyWrite;
+    s_storeDone;
+    wt_writeThrough;
+    ic_invCache;
+    p_popMandatoryQueue;
+  }
+
+  transition({V,L}, StoreThrough, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    dw_dirtyWrite;
+    s_storeDone;
+    wt_writeThrough;
+    ic_invCache;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, TCC_Ack, V) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} {
+    a_allocate;
+    w_writeCache;
+    l_loadDone;
+    pr_popResponseQueue;
+  }
+
+  transition(I, Bypass, I) {
+    rb_bypassDone;
+    pr_popResponseQueue;
+  }
+
+  transition(A, Bypass, I){
+    d_deallocateTBE;
+    wab_bypassDone;
+    pr_popResponseQueue;
+  }
+
+  transition(A, TCC_Ack, I) {TagArrayRead, DataArrayRead, DataArrayWrite} {
+    d_deallocateTBE;
+    a_allocate;
+    w_writeCache;
+    s_storeDone;
+    pr_popResponseQueue;
+    ic_invCache;
+  }
+
+  transition(V, TCC_Ack, V) {TagArrayRead, DataArrayRead, DataArrayWrite} {
+    w_writeCache;
+    l_loadDone;
+    pr_popResponseQueue;
+  }
+
+  transition({W, M}, TCC_Ack, M) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} {
+    w_writeCache;
+    l_loadDone;
+    pr_popResponseQueue;
+  }
+
+  transition({I, V}, Repl, I) {TagArrayRead, TagArrayWrite} {
+    ic_invCache;
+  }
+
+  transition({A}, Repl) {TagArrayRead, TagArrayWrite} {
+    ic_invCache;
+  }
+
+  transition({W, M}, Repl, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+    wt_writeThrough;
+    ic_invCache;
+  }
+
+  transition(L, Repl, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+    wt_writeThrough;
+    ic_invCache;
+  }
+
+  transition({W, M}, Flush, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+    sf_setFlush;
+    wt_writeThrough;
+    ic_invCache;
+    p_popMandatoryQueue;
+  }
+
+  transition({V, I, A, L},Flush) {TagArrayFlash} {
+    sf_setFlush;
+    wb_wbDone;
+    p_popMandatoryQueue;
+  }
+
+  transition({I, V}, Evict, I) {TagArrayFlash} {
+    inv_invDone;
+    p_popMandatoryQueue;
+    ic_invCache;
+  }
+
+  transition({W, M}, Evict, W) {TagArrayFlash} {
+    inv_invDone;
+    p_popMandatoryQueue;
+  }
+
+  transition({A, L}, Evict) {TagArrayFlash} {
+    inv_invDone;
+    p_popMandatoryQueue;
+  }
+
+  // TCC_AckWB only snoops TBE
+  transition({V, I, A, M, W, L}, TCC_AckWB) {
+    wd_wtDone;
+    wb_wbDone;
+    pr_popResponseQueue;
+  }
+}
diff --git a/src/mem/protocol/GPU_VIPER.slicc b/src/mem/protocol/GPU_VIPER.slicc
new file mode 100644
index 000000000..45f7f3477
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER.slicc
@@ -0,0 +1,9 @@
+protocol "GPU_VIPER";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-dir.sm";
+include "MOESI_AMD_Base-CorePair.sm";
+include "GPU_VIPER-TCP.sm";
+include "GPU_VIPER-SQC.sm";
+include "GPU_VIPER-TCC.sm";
+include "MOESI_AMD_Base-L3cache.sm";
diff --git a/src/mem/protocol/GPU_VIPER_Baseline.slicc b/src/mem/protocol/GPU_VIPER_Baseline.slicc
new file mode 100644
index 000000000..49bdce38c
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER_Baseline.slicc
@@ -0,0 +1,9 @@
+protocol "GPU_VIPER";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-probeFilter.sm";
+include "MOESI_AMD_Base-CorePair.sm";
+include "GPU_VIPER-TCP.sm";
+include "GPU_VIPER-SQC.sm";
+include "GPU_VIPER-TCC.sm";
+include "MOESI_AMD_Base-L3cache.sm";
diff --git a/src/mem/protocol/GPU_VIPER_Region-TCC.sm b/src/mem/protocol/GPU_VIPER_Region-TCC.sm
new file mode 100644
index 000000000..c3aef15a3
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER_Region-TCC.sm
@@ -0,0 +1,773 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor, Blake Hechtman
+ */
+
+/*
+ * This file is inherited from GPU_VIPER-TCC.sm and retains its structure.
+ * There are very few modifications in this file from the original VIPER TCC
+ */
+
+machine(MachineType:TCC, "TCC Cache")
+ : CacheMemory * L2cache;
+   bool WB; /*is this cache Writeback?*/
+   int regionBufferNum;
+   Cycles l2_request_latency := 50;
+   Cycles l2_response_latency := 20;
+
+  // From the TCPs or SQCs
+  MessageBuffer * requestFromTCP, network="From", virtual_network="1", ordered="true", vnet_type="request";
+  // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC.
+  MessageBuffer * responseToCore, network="To", virtual_network="3", ordered="true", vnet_type="response";
+  // From the NB
+  MessageBuffer * probeFromNB, network="From", virtual_network="0", ordered="false", vnet_type="request";
+  MessageBuffer * responseFromNB, network="From", virtual_network="2", ordered="false", vnet_type="response";
+  // To the NB
+  MessageBuffer * requestToNB, network="To", virtual_network="0", ordered="false", vnet_type="request";
+  MessageBuffer * responseToNB, network="To", virtual_network="2", ordered="false", vnet_type="response";
+  MessageBuffer * unblockToNB, network="To", virtual_network="4", ordered="false", vnet_type="unblock";
+
+  MessageBuffer * triggerQueue, ordered="true", random="false";
+{
+  // EVENTS
+  enumeration(Event, desc="TCC Events") {
+    // Requests coming from the Cores
+    RdBlk,                  desc="RdBlk event";
+    WrVicBlk,               desc="L1 Write Through";
+    WrVicBlkBack,           desc="L1 Write Back(dirty cache)";
+    Atomic,                 desc="Atomic Op";
+    AtomicDone,             desc="AtomicOps Complete";
+    AtomicNotDone,          desc="AtomicOps not Complete";
+    Data,                   desc="data messgae";
+    // Coming from this TCC
+    L2_Repl,                desc="L2 Replacement";
+    // Probes
+    PrbInv,                 desc="Invalidating probe";
+    // Coming from Memory Controller
+    WBAck,                  desc="writethrough ack from memory";
+  }
+
+  // STATES
+  state_declaration(State, desc="TCC State", default="TCC_State_I") {
+    M, AccessPermission:Read_Write, desc="Modified(dirty cache only)";
+    W, AccessPermission:Read_Write, desc="Written(dirty cache only)";
+    V, AccessPermission:Read_Only,  desc="Valid";
+    I, AccessPermission:Invalid,    desc="Invalid";
+    IV, AccessPermission:Busy,      desc="Waiting for Data";
+    WI, AccessPermission:Busy,      desc="Waiting on Writethrough Ack";
+    A, AccessPermission:Busy,       desc="Invalid waiting on atomic Data";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+
+  // STRUCTURES
+
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff from memory?)";
+    DataBlock DataBlk,          desc="Data for the block";
+    WriteMask writeMask,        desc="Dirty byte mask";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,     desc="Transient state";
+    DataBlock DataBlk,  desc="data for the block";
+    bool Dirty,         desc="Is the data dirty?";
+    bool Shared,        desc="Victim hit by shared probe";
+    MachineID From,     desc="Waiting for writeback from...";
+    NetDest Destination, desc="Data destination";
+    int numAtomics,     desc="number remaining atomics";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs";
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+
+
+  // FUNCTION DEFINITIONS
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  MachineID getPeer(MachineID mach) {
+    return createMachineID(MachineType:RegionBuffer, intToID(regionBufferNum));
+  }
+
+ Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+    return static_cast(Entry, "pointer", L2cache.lookup(addr));
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    return getCacheEntry(addr).DataBlk;
+  }
+
+  bool presentOrAvail(Addr addr) {
+    return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if (is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+        tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+        cache_entry.CacheState := state;
+    }
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes +
+        functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return TCC_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return TCC_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(TCC_State_to_permission(state));
+    }
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      L2cache.recordRequestType(CacheRequestType:DataArrayRead,addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      L2cache.recordRequestType(CacheRequestType:DataArrayWrite,addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      L2cache.recordRequestType(CacheRequestType:TagArrayRead,addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      L2cache.recordRequestType(CacheRequestType:TagArrayWrite,addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+
+  // ** OUT_PORTS **
+
+  // Three classes of ports
+  // Class 1: downward facing network links to NB
+  out_port(requestToNB_out, CPURequestMsg, requestToNB);
+  out_port(responseToNB_out, ResponseMsg, responseToNB);
+  out_port(unblockToNB_out, UnblockMsg, unblockToNB);
+
+  // Class 2: upward facing ports to GPU cores
+  out_port(responseToCore_out, ResponseMsg, responseToCore);
+
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+  //
+  // request queue going to NB
+  //
+
+
+// ** IN_PORTS **
+  in_port(triggerQueue_in, TiggerMsg, triggerQueue) {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (tbe.numAtomics == 0) {
+            trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe);
+        } else {
+            trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+
+
+  in_port(responseFromNB_in, ResponseMsg, responseFromNB) {
+    if (responseFromNB_in.isReady(clockEdge())) {
+      peek(responseFromNB_in, ResponseMsg, block_on="addr") {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+          if(presentOrAvail(in_msg.addr)) {
+            trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+          } else {
+            Addr victim :=  L2cache.cacheProbe(in_msg.addr);
+            trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+          trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+
+  // Finally handling incoming requests (from TCP) and probes (from NB).
+
+  in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+      peek(probeNetwork_in, NBProbeRequestMsg) {
+        DPRINTF(RubySlicc, "%s\n", in_msg);
+        DPRINTF(RubySlicc, "machineID: %s\n", machineID);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+      }
+    }
+  }
+
+
+  in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) {
+    if (coreRequestNetwork_in.isReady(clockEdge())) {
+      peek(coreRequestNetwork_in, CPURequestMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+            if(WB) {
+                if(presentOrAvail(in_msg.addr)) {
+                    trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe);
+                } else {
+                    Addr victim :=  L2cache.cacheProbe(in_msg.addr);
+                    trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+                }
+            } else {
+                trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
+            }
+        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+          trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
+          trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+        } else {
+          DPRINTF(RubySlicc, "%s\n", in_msg);
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+  // BEGIN ACTIONS
+
+  action(i_invL2, "i", desc="invalidate TCC cache block") {
+    if (is_valid(cache_entry)) {
+        L2cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  // Data available at TCC. Send the DATA to TCP
+  action(sd_sendData, "sd", desc="send Shared response") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.Dirty := false;
+        out_msg.State := CoherenceState:Shared;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+
+  // Data was not available at TCC. So, TCC forwarded the request to
+  // directory and directory responded back with data. Now, forward the
+  // DATA to TCP and send the unblock ack back to directory.
+  action(sdr_sendDataResponse, "sdr", desc="send Shared response") {
+    enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:TDSysResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination := tbe.Destination;
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := false;
+      out_msg.State := CoherenceState:Shared;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+    enqueue(unblockToNB_out, UnblockMsg, 1) {
+      out_msg.addr := address;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+
+  action(rd_requestData, "r", desc="Miss in L2, pass on") {
+    if(tbe.Destination.count()==1){
+      peek(coreRequestNetwork_in, CPURequestMsg) {
+        enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+          out_msg.addr := address;
+          out_msg.Type := in_msg.Type;
+          out_msg.Requestor := machineID;
+          out_msg.Destination.add(getPeer(machineID));
+          out_msg.Shared := false; // unneeded for this request
+          out_msg.MessageSize := in_msg.MessageSize;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+      }
+    }
+  }
+
+  action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+    peek(responseFromNB_in, ResponseMsg) {
+        enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:TDSysWBAck;
+          out_msg.Destination.clear();
+          out_msg.Destination.add(in_msg.WTRequestor);
+          out_msg.Sender := machineID;
+          out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        }
+    }
+  }
+
+  action(swb_sendWBAck, "swb", desc="send WB Ack") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysWBAck;
+        out_msg.Destination.clear();
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+      }
+    }
+  }
+
+  action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") {
+    peek(responseFromNB_in, ResponseMsg) {
+        enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:TDSysResp;
+          out_msg.Destination.add(in_msg.WTRequestor);
+          out_msg.Sender := machineID;
+          out_msg.MessageSize := in_msg.MessageSize;
+          out_msg.DataBlk := in_msg.DataBlk;
+        }
+    }
+  }
+  action(sd2rb_sendDone2RegionBuffer, "sd2rb", desc="Request finished, send done ack") {
+    enqueue(unblockToNB_out, UnblockMsg, 1) {
+      out_msg.addr := address;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.DoneAck := true;
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      if (is_valid(tbe)) {
+          out_msg.Dirty := tbe.Dirty;
+      } else {
+          out_msg.Dirty := false;
+      }
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(a_allocateBlock, "a", desc="allocate TCC block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L2cache.allocate(address, new Entry));
+      cache_entry.writeMask.clear();
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    if (is_invalid(tbe)) {
+      check_allocate(TBEs);
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      tbe.Destination.clear();
+      tbe.numAtomics := 0;
+    }
+    if (coreRequestNetwork_in.isReady(clockEdge())) {
+      peek(coreRequestNetwork_in, CPURequestMsg) {
+        if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
+          tbe.Destination.add(in_msg.Requestor);
+        }
+      }
+    }
+  }
+
+  action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
+    tbe.Destination.clear();
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") {
+    peek(responseFromNB_in, ResponseMsg) {
+      cache_entry.DataBlk := in_msg.DataBlk;
+      DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+    }
+  }
+
+  action(wdb_writeDirtyBytes, "wdb", desc="write data to TCC") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      cache_entry.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
+      cache_entry.writeMask.orMask(in_msg.writeMask);
+      DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+    }
+  }
+
+  action(wt_writeThrough, "wt", desc="write through data") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+        out_msg.addr := address;
+        out_msg.Requestor := machineID;
+        out_msg.WTRequestor := in_msg.Requestor;
+        out_msg.Destination.add(getPeer(machineID));
+        out_msg.MessageSize := MessageSizeType:Data;
+        out_msg.Type := CoherenceRequestType:WriteThrough;
+        out_msg.Dirty := true;
+        out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.writeMask.orMask(in_msg.writeMask);
+      }
+    }
+  }
+
+  action(wb_writeBack, "wb", desc="write back data") {
+    enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      out_msg.WTRequestor := machineID;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.MessageSize := MessageSizeType:Data;
+      out_msg.Type := CoherenceRequestType:WriteThrough;
+      out_msg.Dirty := true;
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.writeMask.orMask(cache_entry.writeMask);
+    }
+  }
+
+  action(at_atomicThrough, "at", desc="write back data") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+        out_msg.addr := address;
+        out_msg.Requestor := machineID;
+        out_msg.WTRequestor := in_msg.Requestor;
+        out_msg.Destination.add(getPeer(machineID));
+        out_msg.MessageSize := MessageSizeType:Data;
+        out_msg.Type := CoherenceRequestType:Atomic;
+        out_msg.Dirty := true;
+        out_msg.writeMask.orMask(in_msg.writeMask);
+      }
+    }
+  }
+
+  action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+    enqueue(responseToNB_out, ResponseMsg, 1) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC, L3  respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+  action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+    L2cache.setMRU(address);
+  }
+
+  action(p_popRequestQueue, "p", desc="pop request queue") {
+    coreRequestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="pop response queue") {
+    responseFromNB_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+  action(zz_recycleRequestQueue, "z", desc="stall"){
+    coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+
+  action(ina_incrementNumAtomics, "ina", desc="inc num atomics") {
+    tbe.numAtomics := tbe.numAtomics + 1;
+  }
+
+
+  action(dna_decrementNumAtomics, "dna", desc="dec num atomics") {
+    tbe.numAtomics := tbe.numAtomics - 1;
+    if (tbe.numAtomics==0) {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:AtomicDone;
+      }
+    }
+  }
+
+  action(ptr_popTriggerQueue, "ptr", desc="pop Trigger") {
+    triggerQueue_in.dequeue(clockEdge());
+  }
+
+  // END ACTIONS
+
+  // BEGIN TRANSITIONS
+  // transitions from base
+  // Assumptions for ArrayRead/Write
+  // TBE checked before tags
+  // Data Read/Write requires Tag Read
+
+  transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} {
+    zz_recycleRequestQueue;
+  }
+  transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) {TagArrayRead} {
+    zz_recycleRequestQueue;
+  }
+  transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} {
+    zz_recycleRequestQueue;
+  }
+  transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} {
+    sd_sendData;
+    ut_updateTag;
+    p_popRequestQueue;
+  }
+  transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} {
+    t_allocateTBE;
+    wb_writeBack;
+  }
+
+  transition(I, RdBlk, IV) {TagArrayRead} {
+    t_allocateTBE;
+    rd_requestData;
+    p_popRequestQueue;
+  }
+
+  transition(IV, RdBlk) {
+    t_allocateTBE;
+    rd_requestData;
+    p_popRequestQueue;
+  }
+
+  transition({V, I},Atomic, A) {TagArrayRead} {
+    i_invL2;
+    t_allocateTBE;
+    at_atomicThrough;
+    ina_incrementNumAtomics;
+    p_popRequestQueue;
+  }
+
+  transition(A, Atomic) {
+    at_atomicThrough;
+    ina_incrementNumAtomics;
+    p_popRequestQueue;
+  }
+
+  transition({M, W}, Atomic, WI) {TagArrayRead} {
+    t_allocateTBE;
+    wb_writeBack;
+  }
+
+  // Cahceblock stays in I state which implies
+  // this TCC is a write-no-allocate cache
+  transition(I, WrVicBlk) {TagArrayRead} {
+    wt_writeThrough;
+    p_popRequestQueue;
+  }
+
+  transition(V, WrVicBlk) {TagArrayRead, DataArrayWrite} {
+    ut_updateTag;
+    wdb_writeDirtyBytes;
+    wt_writeThrough;
+    p_popRequestQueue;
+  }
+
+  transition({V, M}, WrVicBlkBack, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    ut_updateTag;
+    swb_sendWBAck;
+    wdb_writeDirtyBytes;
+    p_popRequestQueue;
+  }
+
+  transition(W, WrVicBlkBack) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    ut_updateTag;
+    swb_sendWBAck;
+    wdb_writeDirtyBytes;
+    p_popRequestQueue;
+  }
+
+  transition(I, WrVicBlkBack, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    a_allocateBlock;
+    ut_updateTag;
+    swb_sendWBAck;
+    wdb_writeDirtyBytes;
+    p_popRequestQueue;
+  }
+
+  transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} {
+    t_allocateTBE;
+    wb_writeBack;
+    i_invL2;
+  }
+
+  transition({I, V}, L2_Repl, I) {TagArrayRead, TagArrayWrite} {
+    i_invL2;
+  }
+
+  transition({A, IV, WI}, L2_Repl) {
+    i_invL2;
+  }
+
+  transition({I, V}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition(M, PrbInv, W) {TagArrayRead, TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition(W, PrbInv) {TagArrayRead} {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition({A, IV, WI}, PrbInv) {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    a_allocateBlock;
+    ut_updateTag;
+    wcb_writeCacheBlock;
+    sdr_sendDataResponse;
+    sd2rb_sendDone2RegionBuffer;
+    pr_popResponseQueue;
+    dt_deallocateTBE;
+  }
+
+  transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    a_allocateBlock;
+    ar_sendAtomicResponse;
+    sd2rb_sendDone2RegionBuffer;
+    dna_decrementNumAtomics;
+    pr_popResponseQueue;
+  }
+
+  transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} {
+    dt_deallocateTBE;
+    ptr_popTriggerQueue;
+  }
+
+  transition(A, AtomicNotDone) {TagArrayRead} {
+    ptr_popTriggerQueue;
+  }
+
+  //M,W should not see WBAck as the cache is in WB mode
+  //WBAcks do not need to check tags
+  transition({I, V, IV, A}, WBAck) {
+    w_sendResponseWBAck;
+    sd2rb_sendDone2RegionBuffer;
+    pr_popResponseQueue;
+  }
+
+  transition(WI, WBAck,I) {
+    sd2rb_sendDone2RegionBuffer;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+}
diff --git a/src/mem/protocol/GPU_VIPER_Region.slicc b/src/mem/protocol/GPU_VIPER_Region.slicc
new file mode 100644
index 000000000..cbfef9de3
--- /dev/null
+++ b/src/mem/protocol/GPU_VIPER_Region.slicc
@@ -0,0 +1,11 @@
+protocol "GPU_VIPER_Region";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-Region-CorePair.sm";
+include "MOESI_AMD_Base-L3cache.sm";
+include "MOESI_AMD_Base-Region-dir.sm";
+include "GPU_VIPER_Region-TCC.sm";
+include "GPU_VIPER-TCP.sm";
+include "GPU_VIPER-SQC.sm";
+include "MOESI_AMD_Base-RegionDir.sm";
+include "MOESI_AMD_Base-RegionBuffer.sm";
diff --git a/src/mem/protocol/MOESI_AMD_Base-CorePair.sm b/src/mem/protocol/MOESI_AMD_Base-CorePair.sm
new file mode 100644
index 000000000..76fe77230
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-CorePair.sm
@@ -0,0 +1,2904 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:CorePair, "CP-like Core Coherence")
+ : Sequencer * sequencer;
+   Sequencer * sequencer1;
+   CacheMemory * L1Icache;
+   CacheMemory * L1D0cache;
+   CacheMemory * L1D1cache;
+   CacheMemory * L2cache;     // func mem logic looks in this CacheMemory
+   bool send_evictions := "False";
+   Cycles issue_latency := 5;  // time to send data down to NB
+   Cycles l2_hit_latency := 18;
+
+  // BEGIN Core Buffers
+
+  // To the Network
+  MessageBuffer * requestFromCore, network="To", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseFromCore, network="To", virtual_network="2", vnet_type="response";
+  MessageBuffer * unblockFromCore, network="To", virtual_network="4", vnet_type="unblock";
+
+  // From the Network
+  MessageBuffer * probeToCore, network="From", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseToCore, network="From", virtual_network="2", vnet_type="response";
+
+  MessageBuffer * mandatoryQueue;
+
+  MessageBuffer * triggerQueue, ordered="true";
+
+  // END Core Buffers
+
+{
+  // BEGIN STATES
+  state_declaration(State, desc="Cache states", default="CorePair_State_I") {
+
+    // Base States
+    I, AccessPermission:Invalid, desc="Invalid";
+    S, AccessPermission:Read_Only, desc="Shared";
+    E0, AccessPermission:Read_Write, desc="Exclusive with Cluster 0 ownership";
+    E1, AccessPermission:Read_Write, desc="Exclusive with Cluster 1 ownership";
+    Es, AccessPermission:Read_Write, desc="Exclusive in core";
+    O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line";
+    Ms, AccessPermission:Read_Write, desc="Modified in core, both clusters may be sharing line";
+    M0, AccessPermission:Read_Write, desc="Modified with cluster ownership";
+    M1, AccessPermission:Read_Write, desc="Modified with cluster ownership";
+
+    // Transient States
+    I_M0, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+    I_M1, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+    I_M0M1, AccessPermission:Busy, desc="Was in I_M0, got a store request from other cluster as well";
+    I_M1M0, AccessPermission:Busy, desc="Was in I_M1, got a store request from other cluster as well";
+    I_M0Ms, AccessPermission:Busy, desc="Was in I_M0, got a load request from other cluster as well";
+    I_M1Ms, AccessPermission:Busy, desc="Was in I_M1, got a load request from other cluster as well";
+    I_E0S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+    I_E1S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+    I_ES, AccessPermission:Busy, desc="S_F got hit by invalidating probe, RdBlk response needs to go to both clusters";
+
+    IF_E0S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E0S but expecting a L2_to_L1D0 trigger, just drop when receive";
+    IF_E1S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E1S but expecting a L2_to_L1D1 trigger, just drop when receive";
+    IF_ES, AccessPermission:Busy, desc="same, but waiting for two fills";
+    IF0_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one";
+    IF1_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one";
+    F_S0, AccessPermission:Busy, desc="same, but going to S0 when trigger received";
+    F_S1, AccessPermission:Busy, desc="same, but going to S1 when trigger received";
+
+    ES_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for clean writeback ack";
+    MO_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for dirty writeback ack";
+    MO_S0, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS";
+    MO_S1, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS";
+    S_F0, AccessPermission:Read_Only,  desc="Shared, filling L1";
+    S_F1, AccessPermission:Read_Only,  desc="Shared, filling L1";
+    S_F, AccessPermission:Read_Only,   desc="Shared, filling L1";
+    O_F0, AccessPermission:Read_Only,  desc="Owned, filling L1";
+    O_F1, AccessPermission:Read_Only,  desc="Owned, filling L1";
+    O_F,  AccessPermission:Read_Only,  desc="Owned, filling L1";
+    Si_F0, AccessPermission:Read_Only, desc="Shared, filling icache";
+    Si_F1, AccessPermission:Read_Only, desc="Shared, filling icache";
+    S_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+    S_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+    O_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+    O_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+    S0, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 0, waiting for response";
+    S1, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 1, waiting for response";
+
+    Es_F0, AccessPermission:Read_Write, desc="Es, Cluster read, filling";
+    Es_F1, AccessPermission:Read_Write, desc="Es, Cluster read, filling";
+    Es_F, AccessPermission:Read_Write,  desc="Es, other cluster read, filling";
+    E0_F, AccessPermission:Read_Write, desc="E0, cluster read, filling";
+    E1_F, AccessPermission:Read_Write, desc="...";
+    E0_Es, AccessPermission:Read_Write, desc="...";
+    E1_Es, AccessPermission:Read_Write, desc="...";
+    Ms_F0, AccessPermission:Read_Write, desc="...";
+    Ms_F1, AccessPermission:Read_Write, desc="...";
+    Ms_F, AccessPermission:Read_Write,  desc="...";
+    M0_F, AccessPermission:Read_Write, desc="...";
+    M0_Ms, AccessPermission:Read_Write, desc="...";
+    M1_F, AccessPermission:Read_Write, desc="...";
+    M1_Ms, AccessPermission:Read_Write, desc="...";
+
+    I_C, AccessPermission:Invalid, desc="Invalid, but waiting for WBAck from NB from canceled writeback";
+    S0_C, AccessPermission:Busy, desc="MO_S0 hit by invalidating probe, waiting for WBAck form NB for canceled WB";
+    S1_C, AccessPermission:Busy, desc="MO_S1 hit by invalidating probe, waiting for WBAck form NB for canceled WB";
+    S_C, AccessPermission:Busy, desc="S*_C got NB_AckS, still waiting for WBAck";
+
+  } // END STATES
+
+  // BEGIN EVENTS
+  enumeration(Event, desc="CP Events") {
+    // CP Initiated events
+    C0_Load_L1miss,            desc="Cluster 0 load, L1 missed";
+    C0_Load_L1hit,             desc="Cluster 0 load, L1 hit";
+    C1_Load_L1miss,            desc="Cluster 1 load L1 missed";
+    C1_Load_L1hit,             desc="Cluster 1 load L1 hit";
+    Ifetch0_L1hit,             desc="Instruction fetch, hit in the L1";
+    Ifetch1_L1hit,             desc="Instruction fetch, hit in the L1";
+    Ifetch0_L1miss,            desc="Instruction fetch, missed in the L1";
+    Ifetch1_L1miss,            desc="Instruction fetch, missed in the L1";
+    C0_Store_L1miss,           desc="Cluster 0 store missed in L1";
+    C0_Store_L1hit,            desc="Cluster 0 store hit in L1";
+    C1_Store_L1miss,           desc="Cluster 1 store missed in L1";
+    C1_Store_L1hit,            desc="Cluster 1 store hit in L1";
+    // NB Initiated events
+    NB_AckS,             desc="NB Ack to Core Request";
+    NB_AckM,             desc="NB Ack to Core Request";
+    NB_AckE,             desc="NB Ack to Core Request";
+
+    NB_AckWB,            desc="NB Ack for writeback";
+
+    // Memory System initiatied events
+    L1I_Repl,           desc="Replace address from L1I"; // Presumed clean
+    L1D0_Repl,           desc="Replace address from L1D0"; // Presumed clean
+    L1D1_Repl,           desc="Replace address from L1D1"; // Presumed clean
+    L2_Repl,            desc="Replace address from L2";
+
+    L2_to_L1D0,           desc="L1 fill from L2";
+    L2_to_L1D1,           desc="L1 fill from L2";
+    L2_to_L1I,           desc="L1 fill from L2";
+
+    // Probe Events
+    PrbInvData,         desc="probe, return O or M data";
+    PrbInv,             desc="probe, no need for data";
+    PrbShrData,         desc="probe downgrade, return O or M data";
+
+  }  // END EVENTS
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    L1D0DataArrayRead,    desc="Read the data array";
+    L1D0DataArrayWrite,   desc="Write the data array";
+    L1D0TagArrayRead,     desc="Read the data array";
+    L1D0TagArrayWrite,    desc="Write the data array";
+    L1D1DataArrayRead,    desc="Read the data array";
+    L1D1DataArrayWrite,   desc="Write the data array";
+    L1D1TagArrayRead,     desc="Read the data array";
+    L1D1TagArrayWrite,    desc="Write the data array";
+    L1IDataArrayRead,     desc="Read the data array";
+    L1IDataArrayWrite,    desc="Write the data array";
+    L1ITagArrayRead,      desc="Read the data array";
+    L1ITagArrayWrite,     desc="Write the data array";
+    L2DataArrayRead,      desc="Read the data array";
+    L2DataArrayWrite,     desc="Write the data array";
+    L2TagArrayRead,       desc="Read the data array";
+    L2TagArrayWrite,      desc="Write the data array";
+  }
+
+
+  // BEGIN STRUCTURE DEFINITIONS
+
+
+  // Cache Entry
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff than memory)?";
+    DataBlock DataBlk,          desc="data for the block";
+    bool FromL2, default="false", desc="block just moved from L2";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,             desc="Transient state";
+    DataBlock DataBlk,       desc="data for the block, required for concurrent writebacks";
+    bool Dirty,              desc="Is the data dirty (different than memory)?";
+    int NumPendingMsgs,      desc="Number of acks/data messages that this processor is waiting for";
+    bool Shared,             desc="Victim hit by shared probe";
+   }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<CorePair_TBE>", constructor="m_number_of_TBEs";
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  // END STRUCTURE DEFINITIONS
+
+  // BEGIN INTERNAL FUNCTIONS
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  bool addressInCore(Addr addr) {
+    return (L2cache.isTagPresent(addr) || L1Icache.isTagPresent(addr) || L1D0cache.isTagPresent(addr) || L1D1cache.isTagPresent(addr));
+  }
+
+  Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address));
+    return L2cache_entry;
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return tbe.DataBlk;
+    } else {
+      return getCacheEntry(addr).DataBlk;
+    }
+  }
+
+  Entry getL1CacheEntry(Addr addr, int cluster), return_by_pointer="yes" {
+    if (cluster == 0) {
+      Entry L1D0_entry := static_cast(Entry, "pointer", L1D0cache.lookup(addr));
+      return L1D0_entry;
+    } else {
+      Entry L1D1_entry := static_cast(Entry, "pointer", L1D1cache.lookup(addr));
+      return L1D1_entry;
+    }
+  }
+
+  Entry getICacheEntry(Addr addr), return_by_pointer="yes" {
+    Entry c_entry := static_cast(Entry, "pointer", L1Icache.lookup(addr));
+    return c_entry;
+  }
+
+  bool presentOrAvail2(Addr addr) {
+    return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+  }
+
+  bool presentOrAvailI(Addr addr) {
+    return L1Icache.isTagPresent(addr) || L1Icache.cacheAvail(addr);
+  }
+
+  bool presentOrAvailD0(Addr addr) {
+    return L1D0cache.isTagPresent(addr) || L1D0cache.cacheAvail(addr);
+  }
+
+  bool presentOrAvailD1(Addr addr) {
+    return L1D1cache.isTagPresent(addr) || L1D1cache.cacheAvail(addr);
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if(is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return CorePair_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return CorePair_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(CorePair_State_to_permission(state));
+    }
+  }
+
+  MachineType testAndClearLocalHit(Entry cache_entry) {
+    assert(is_valid(cache_entry));
+    if (cache_entry.FromL2) {
+      cache_entry.FromL2 := false;
+      return MachineType:L2Cache;
+    } else {
+      return MachineType:L1Cache;
+    }
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L1D0DataArrayRead) {
+        L1D0cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L1D0DataArrayWrite) {
+        L1D0cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L1D0TagArrayRead) {
+        L1D0cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L1D0TagArrayWrite) {
+        L1D0cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    } else if (request_type == RequestType:L1D1DataArrayRead) {
+        L1D1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L1D1DataArrayWrite) {
+        L1D1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L1D1TagArrayRead) {
+        L1D1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L1D1TagArrayWrite) {
+        L1D1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    } else if (request_type == RequestType:L1IDataArrayRead) {
+        L1Icache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L1IDataArrayWrite) {
+        L1Icache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L1ITagArrayRead) {
+        L1Icache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L1ITagArrayWrite) {
+        L1Icache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    } else if (request_type == RequestType:L2DataArrayRead) {
+        L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L2DataArrayWrite) {
+        L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L2TagArrayRead) {
+        L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L2TagArrayWrite) {
+        L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L2DataArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L2DataArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L2TagArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L2TagArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if  (request_type == RequestType:L1D0DataArrayRead) {
+      return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1D0DataArrayWrite) {
+      return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1D0TagArrayRead) {
+      return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1D0TagArrayWrite) {
+      return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1D1DataArrayRead) {
+      return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1D1DataArrayWrite) {
+      return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1D1TagArrayRead) {
+      return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1D1TagArrayWrite) {
+      return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1IDataArrayRead) {
+      return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1IDataArrayWrite) {
+      return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1ITagArrayRead) {
+      return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1ITagArrayWrite) {
+      return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+
+    } else {
+      return true;
+    }
+  }
+
+  // END INTERNAL FUNCTIONS
+
+  // ** OUT_PORTS **
+
+  out_port(requestNetwork_out, CPURequestMsg, requestFromCore);
+  out_port(responseNetwork_out, ResponseMsg, responseFromCore);
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+  out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
+
+  // ** IN_PORTS **
+
+  in_port(triggerQueue_in, TriggerMsg, triggerQueue, block_on="addr") {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == TriggerType:L2_to_L1) {
+          if (in_msg.Dest == CacheId:L1I) {
+            trigger(Event:L2_to_L1I, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Dest == CacheId:L1D0) {
+            trigger(Event:L2_to_L1D0, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Dest == CacheId:L1D1) {
+            trigger(Event:L2_to_L1D1, in_msg.addr, cache_entry, tbe);
+          } else {
+            error("unexpected trigger dest");
+          }
+        }
+      }
+    }
+  }
+
+
+  in_port(probeNetwork_in, NBProbeRequestMsg, probeToCore) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+      peek(probeNetwork_in, NBProbeRequestMsg, block_on="addr") {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == ProbeRequestType:PrbInv) {
+          if (in_msg.ReturnData) {
+            trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+          assert(in_msg.ReturnData);
+          trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+
+  // ResponseNetwork
+  in_port(responseToCore_in, ResponseMsg, responseToCore) {
+    if (responseToCore_in.isReady(clockEdge())) {
+      peek(responseToCore_in, ResponseMsg, block_on="addr") {
+
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+          if (in_msg.State == CoherenceState:Modified) {
+              trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.State == CoherenceState:Shared) {
+            trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.State == CoherenceState:Exclusive) {
+            trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+          trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+
+  // Nothing from the Unblock Network
+
+  // Mandatory Queue
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+    if (mandatoryQueue_in.isReady(clockEdge())) {
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+
+        Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+        TBE tbe := TBEs.lookup(in_msg.LineAddress);
+
+        if (in_msg.Type == RubyRequestType:IFETCH) {
+          // FETCH ACCESS
+
+          if (L1Icache.isTagPresent(in_msg.LineAddress)) {
+            if (mod(in_msg.contextId, 2) == 0) {
+              trigger(Event:Ifetch0_L1hit, in_msg.LineAddress, cache_entry, tbe);
+            } else {
+              trigger(Event:Ifetch1_L1hit, in_msg.LineAddress, cache_entry, tbe);
+            }
+          } else {
+            if (presentOrAvail2(in_msg.LineAddress)) {
+              if (presentOrAvailI(in_msg.LineAddress)) {
+                if (mod(in_msg.contextId, 2) == 0) {
+                  trigger(Event:Ifetch0_L1miss, in_msg.LineAddress, cache_entry,
+                          tbe);
+                } else {
+                  trigger(Event:Ifetch1_L1miss, in_msg.LineAddress, cache_entry,
+                          tbe);
+                }
+              } else {
+                Addr victim := L1Icache.cacheProbe(in_msg.LineAddress);
+                trigger(Event:L1I_Repl, victim,
+                        getCacheEntry(victim), TBEs.lookup(victim));
+              }
+            } else { // Not present or avail in L2
+              Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+              trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+                      TBEs.lookup(victim));
+            }
+          }
+        } else {
+          // DATA ACCESS
+          if (mod(in_msg.contextId, 2) == 1) {
+            if (L1D1cache.isTagPresent(in_msg.LineAddress)) {
+              if (in_msg.Type == RubyRequestType:LD) {
+                trigger(Event:C1_Load_L1hit, in_msg.LineAddress, cache_entry,
+                        tbe);
+              } else {
+                // Stores must write through, make sure L2 avail.
+                if (presentOrAvail2(in_msg.LineAddress)) {
+                  trigger(Event:C1_Store_L1hit, in_msg.LineAddress, cache_entry,
+                          tbe);
+                } else {
+                  Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+                  trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+                          TBEs.lookup(victim));
+                }
+              }
+            } else {
+              if (presentOrAvail2(in_msg.LineAddress)) {
+                if (presentOrAvailD1(in_msg.LineAddress)) {
+                  if (in_msg.Type == RubyRequestType:LD) {
+                    trigger(Event:C1_Load_L1miss, in_msg.LineAddress,
+                            cache_entry, tbe);
+                  } else {
+                    trigger(Event:C1_Store_L1miss, in_msg.LineAddress,
+                            cache_entry, tbe);
+                  }
+                } else {
+                  Addr victim := L1D1cache.cacheProbe(in_msg.LineAddress);
+                  trigger(Event:L1D1_Repl, victim,
+                          getCacheEntry(victim), TBEs.lookup(victim));
+                }
+              } else { // not present or avail in L2
+                Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+                trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+              }
+            }
+          } else {
+            Entry L1D0cache_entry := getL1CacheEntry(in_msg.LineAddress, 0);
+            if (is_valid(L1D0cache_entry)) {
+              if (in_msg.Type == RubyRequestType:LD) {
+                trigger(Event:C0_Load_L1hit, in_msg.LineAddress, cache_entry,
+                    tbe);
+              } else {
+                if (presentOrAvail2(in_msg.LineAddress)) {
+                  trigger(Event:C0_Store_L1hit, in_msg.LineAddress, cache_entry,
+                      tbe);
+                } else {
+                  Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+                  trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+                      TBEs.lookup(victim));
+                }
+              }
+            } else {
+              if (presentOrAvail2(in_msg.LineAddress)) {
+                if (presentOrAvailD0(in_msg.LineAddress)) {
+                  if (in_msg.Type == RubyRequestType:LD) {
+                    trigger(Event:C0_Load_L1miss, in_msg.LineAddress,
+                        cache_entry, tbe);
+                  } else {
+                    trigger(Event:C0_Store_L1miss, in_msg.LineAddress,
+                            cache_entry, tbe);
+                  }
+                } else {
+                  Addr victim := L1D0cache.cacheProbe(in_msg.LineAddress);
+                  trigger(Event:L1D0_Repl, victim, getCacheEntry(victim),
+                          TBEs.lookup(victim));
+                }
+              } else {
+                Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+                trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+                        TBEs.lookup(victim));
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+
+  // ACTIONS
+  action(ii_invIcache, "ii", desc="invalidate iCache") {
+    if (L1Icache.isTagPresent(address)) {
+      L1Icache.deallocate(address);
+    }
+  }
+
+  action(i0_invCluster, "i0", desc="invalidate cluster 0") {
+    if (L1D0cache.isTagPresent(address)) {
+      L1D0cache.deallocate(address);
+    }
+  }
+
+  action(i1_invCluster, "i1", desc="invalidate cluster 1") {
+    if (L1D1cache.isTagPresent(address)) {
+      L1D1cache.deallocate(address);
+    }
+  }
+
+  action(ib_invBothClusters, "ib", desc="invalidate both clusters") {
+    if (L1D0cache.isTagPresent(address)) {
+      L1D0cache.deallocate(address);
+    }
+    if (L1D1cache.isTagPresent(address)) {
+      L1D1cache.deallocate(address);
+    }
+  }
+
+  action(i2_invL2, "i2", desc="invalidate L2") {
+    if(is_valid(cache_entry)) {
+        L2cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(mru_setMRU, "mru", desc="Update LRU state") {
+    L2cache.setMRU(address);
+  }
+
+  action(mruD1_setD1cacheMRU, "mruD1", desc="Update LRU state") {
+    L1D1cache.setMRU(address);
+  }
+
+  action(mruD0_setD0cacheMRU, "mruD0", desc="Update LRU state") {
+    L1D0cache.setMRU(address);
+  }
+
+  action(mruI_setIcacheMRU, "mruI", desc="Update LRU state") {
+    L1Icache.setMRU(address);
+  }
+
+  action(n_issueRdBlk, "n", desc="Issue RdBlk") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlk;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      DPRINTF(RubySlicc,"%s\n",out_msg.Destination);
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkM;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkS;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(vd_victim, "vd", desc="Victimize M/O L2 Data") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      assert(is_valid(cache_entry));
+      out_msg.DataBlk := cache_entry.DataBlk;
+      assert(cache_entry.Dirty);
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicDirty;
+      out_msg.InitialRequestTime := curCycle();
+      if (cache_entry.CacheState == State:O) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+    }
+  }
+
+  action(vc_victim, "vc", desc="Victimize E/S L2 Data") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicClean;
+      out_msg.InitialRequestTime := curCycle();
+      if (cache_entry.CacheState == State:S) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+    }
+  }
+
+  action(a0_allocateL1D, "a0", desc="Allocate L1D0 Block") {
+    if (L1D0cache.isTagPresent(address) == false) {
+      L1D0cache.allocateVoid(address, new Entry);
+    }
+  }
+
+  action(a1_allocateL1D, "a1", desc="Allocate L1D1 Block") {
+    if (L1D1cache.isTagPresent(address) == false) {
+      L1D1cache.allocateVoid(address, new Entry);
+    }
+  }
+
+  action(ai_allocateL1I, "ai", desc="Allocate L1I Block") {
+    if (L1Icache.isTagPresent(address) == false) {
+      L1Icache.allocateVoid(address, new Entry);
+    }
+  }
+
+  action(a2_allocateL2, "a2", desc="Allocate L2 Block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L2cache.allocate(address, new Entry));
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    assert(is_valid(cache_entry));
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+    tbe.DataBlk := cache_entry.DataBlk;  // Data only used for WBs
+    tbe.Dirty := cache_entry.Dirty;
+    tbe.Shared := false;
+  }
+
+  action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+    mandatoryQueue_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+    responseToCore_in.dequeue(clockEdge());
+  }
+
+  action(pt_popTriggerQueue, "pt", desc="Pop Trigger Queue") {
+    triggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+
+  action(il0_loadDone, "il0", desc="Cluster 0 i load done") {
+    Entry entry := getICacheEntry(address);
+    Entry l2entry := getCacheEntry(address); // Used for functional accesses
+    assert(is_valid(entry));
+    // L2 supplies data (functional accesses only look in L2, ok because L1
+    //                   writes through to L2)
+    sequencer.readCallback(address,
+                           l2entry.DataBlk,
+                           true,
+                           testAndClearLocalHit(entry));
+  }
+
+  action(il1_loadDone, "il1", desc="Cluster 1 i load done") {
+    Entry entry := getICacheEntry(address);
+    Entry l2entry := getCacheEntry(address); // Used for functional accesses
+    assert(is_valid(entry));
+    // L2 supplies data (functional accesses only look in L2, ok because L1
+    //                   writes through to L2)
+    sequencer1.readCallback(address,
+                            l2entry.DataBlk,
+                            true,
+                            testAndClearLocalHit(entry));
+  }
+
+  action(l0_loadDone, "l0", desc="Cluster 0 load done") {
+    Entry entry := getL1CacheEntry(address, 0);
+    Entry l2entry := getCacheEntry(address); // Used for functional accesses
+    assert(is_valid(entry));
+    // L2 supplies data (functional accesses only look in L2, ok because L1
+    //                   writes through to L2)
+    sequencer.readCallback(address,
+                           l2entry.DataBlk,
+                           true,
+                           testAndClearLocalHit(entry));
+  }
+
+  action(l1_loadDone, "l1", desc="Cluster 1 load done") {
+    Entry entry := getL1CacheEntry(address, 1);
+    Entry l2entry := getCacheEntry(address); // Used for functional accesses
+    assert(is_valid(entry));
+    // L2 supplies data (functional accesses only look in L2, ok because L1
+    //                   writes through to L2)
+    sequencer1.readCallback(address,
+                            l2entry.DataBlk,
+                            true,
+                            testAndClearLocalHit(entry));
+  }
+
+  action(xl0_loadDone, "xl0", desc="Cluster 0 load done") {
+    peek(responseToCore_in, ResponseMsg) {
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+              (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      Entry l2entry := getCacheEntry(address); // Used for functional accesses
+      DPRINTF(ProtocolTrace, "CP Load Done 0 -- address %s, data: %s\n", address, l2entry.DataBlk);
+      // L2 supplies data (functional accesses only look in L2, ok because L1
+      //                   writes through to L2)
+      sequencer.readCallback(address,
+                             l2entry.DataBlk,
+                             false,
+                             machineIDToMachineType(in_msg.Sender),
+                             in_msg.InitialRequestTime,
+                             in_msg.ForwardRequestTime,
+                             in_msg.ProbeRequestStartTime);
+    }
+  }
+
+  action(xl1_loadDone, "xl1", desc="Cluster 1 load done") {
+   peek(responseToCore_in, ResponseMsg) {
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+              (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      Entry l2entry := getCacheEntry(address); // Used for functional accesses
+      // L2 supplies data (functional accesses only look in L2, ok because L1
+      //                   writes through to L2)
+      sequencer1.readCallback(address,
+                              l2entry.DataBlk,
+                              false,
+                              machineIDToMachineType(in_msg.Sender),
+                              in_msg.InitialRequestTime,
+                              in_msg.ForwardRequestTime,
+                              in_msg.ProbeRequestStartTime);
+    }
+  }
+
+  action(xi0_loadDone, "xi0", desc="Cluster 0 i-load done") {
+    peek(responseToCore_in, ResponseMsg) {
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+              (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      Entry l2entry := getCacheEntry(address); // Used for functional accesses
+      // L2 supplies data (functional accesses only look in L2, ok because L1
+      //                   writes through to L2)
+      sequencer.readCallback(address,
+                             l2entry.DataBlk,
+                             false,
+                             machineIDToMachineType(in_msg.Sender),
+                             in_msg.InitialRequestTime,
+                             in_msg.ForwardRequestTime,
+                             in_msg.ProbeRequestStartTime);
+    }
+  }
+
+  action(xi1_loadDone, "xi1", desc="Cluster 1 i-load done") {
+    peek(responseToCore_in, ResponseMsg) {
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+              (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      Entry l2entry := getCacheEntry(address); // Used for functional accesses
+      // L2 supplies data (functional accesses only look in L2, ok because L1
+      //                   writes through to L2)
+      sequencer1.readCallback(address,
+                              l2entry.DataBlk,
+                              false,
+                              machineIDToMachineType(in_msg.Sender),
+                              in_msg.InitialRequestTime,
+                              in_msg.ForwardRequestTime,
+                              in_msg.ProbeRequestStartTime);
+    }
+  }
+
+  action(s0_storeDone, "s0", desc="Cluster 0 store done") {
+    Entry entry := getL1CacheEntry(address, 0);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    sequencer.writeCallback(address,
+                            cache_entry.DataBlk,
+                            true,
+                            testAndClearLocalHit(entry));
+    cache_entry.Dirty := true;
+    entry.DataBlk := cache_entry.DataBlk;
+    entry.Dirty := true;
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+  }
+
+  action(s1_storeDone, "s1", desc="Cluster 1 store done") {
+    Entry entry := getL1CacheEntry(address, 1);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    sequencer1.writeCallback(address,
+                             cache_entry.DataBlk,
+                             true,
+                             testAndClearLocalHit(entry));
+    cache_entry.Dirty := true;
+    entry.Dirty := true;
+    entry.DataBlk := cache_entry.DataBlk;
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+  }
+
+  action(xs0_storeDone, "xs0", desc="Cluster 0 store done") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getL1CacheEntry(address, 0);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+             (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      sequencer.writeCallback(address,
+                              cache_entry.DataBlk,
+                              false,
+                              machineIDToMachineType(in_msg.Sender),
+                              in_msg.InitialRequestTime,
+                              in_msg.ForwardRequestTime,
+                              in_msg.ProbeRequestStartTime);
+      cache_entry.Dirty := true;
+      entry.Dirty := true;
+      entry.DataBlk := cache_entry.DataBlk;
+      DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    }
+  }
+
+  action(xs1_storeDone, "xs1", desc="Cluster 1 store done") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getL1CacheEntry(address, 1);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+             (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      sequencer1.writeCallback(address,
+                               cache_entry.DataBlk,
+                               false,
+                               machineIDToMachineType(in_msg.Sender),
+                               in_msg.InitialRequestTime,
+                               in_msg.ForwardRequestTime,
+                               in_msg.ProbeRequestStartTime);
+      cache_entry.Dirty := true;
+      entry.Dirty := true;
+      entry.DataBlk := cache_entry.DataBlk;
+      DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    }
+  }
+
+  action(forward_eviction_to_cpu0, "fec0", desc="sends eviction information to processor0") {
+    if (send_evictions) {
+      DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+      sequencer.evictionCallback(address);
+    }
+  }
+
+  action(forward_eviction_to_cpu1, "fec1", desc="sends eviction information to processor1") {
+    if (send_evictions) {
+      DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+      sequencer1.evictionCallback(address);
+    }
+  }
+
+  action(ci_copyL2ToL1, "ci", desc="copy L2 data to L1") {
+    Entry entry := getICacheEntry(address);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    entry.Dirty := cache_entry.Dirty;
+    entry.DataBlk := cache_entry.DataBlk;
+    entry.FromL2 := true;
+  }
+
+  action(c0_copyL2ToL1, "c0", desc="copy L2 data to L1") {
+    Entry entry := getL1CacheEntry(address, 0);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    entry.Dirty := cache_entry.Dirty;
+    entry.DataBlk := cache_entry.DataBlk;
+    entry.FromL2 := true;
+  }
+
+  action(c1_copyL2ToL1, "c1", desc="copy L2 data to L1") {
+    Entry entry := getL1CacheEntry(address, 1);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    entry.Dirty := cache_entry.Dirty;
+    entry.DataBlk := cache_entry.DataBlk;
+    entry.FromL2 := true;
+  }
+
+  action(fi_L2ToL1, "fi", desc="L2 to L1 inst fill") {
+    enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+      out_msg.addr := address;
+      out_msg.Type := TriggerType:L2_to_L1;
+      out_msg.Dest := CacheId:L1I;
+    }
+  }
+
+  action(f0_L2ToL1, "f0", desc="L2 to L1 data fill") {
+    enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+      out_msg.addr := address;
+      out_msg.Type := TriggerType:L2_to_L1;
+      out_msg.Dest := CacheId:L1D0;
+    }
+  }
+
+  action(f1_L2ToL1, "f1", desc="L2 to L1 data fill") {
+    enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+      out_msg.addr := address;
+      out_msg.Type := TriggerType:L2_to_L1;
+      out_msg.Dest := CacheId:L1D1;
+    }
+  }
+
+  action(wi_writeIcache, "wi", desc="write data to icache (and l2)") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getICacheEntry(address);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      entry.DataBlk := in_msg.DataBlk;
+      entry.Dirty := in_msg.Dirty;
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(w0_writeDcache, "w0", desc="write data to dcache 0 (and l2)") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getL1CacheEntry(address, 0);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      DPRINTF(ProtocolTrace, "CP writeD0: address %s, data: %s\n", address, in_msg.DataBlk);
+      entry.DataBlk := in_msg.DataBlk;
+      entry.Dirty := in_msg.Dirty;
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(w1_writeDcache, "w1", desc="write data to dcache 1 (and l2)") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getL1CacheEntry(address, 1);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      entry.DataBlk := in_msg.DataBlk;
+      entry.Dirty := in_msg.Dirty;
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+    peek(responseToCore_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:StaleNotif;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(map_Address_to_Directory(address));
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(wb_data, "wb", desc="write back data") {
+    peek(responseToCore_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:CPUData;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(map_Address_to_Directory(address));
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Dirty := tbe.Dirty;
+        if (tbe.Shared) {
+          out_msg.NbReqShared := true;
+        } else {
+          out_msg.NbReqShared := false;
+        }
+        out_msg.State := CoherenceState:Shared; // faux info
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.Dirty := false;
+      out_msg.Ntsl := true;
+      out_msg.Hit := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(ph_sendProbeResponseHit, "ph", desc="send probe ack PrbShrData, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      assert(addressInCore(address) || is_valid(tbe));
+      out_msg.Dirty := false;  // only true if sending back data i think
+      out_msg.Hit := true;
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pb_sendProbeResponseBackprobe, "pb", desc="send probe ack PrbShrData, no data, check for L1 residence") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      if (addressInCore(address)) {
+        out_msg.Hit := true;
+      } else {
+        out_msg.Hit := false;
+      }
+      out_msg.Dirty := false;  // not sending back data, so def. not dirty
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.DataBlk := cache_entry.DataBlk;
+      assert(cache_entry.Dirty);
+      out_msg.Dirty := true;
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.DataBlk := cache_entry.DataBlk;
+      assert(cache_entry.Dirty);
+      out_msg.Dirty := true;
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(tbe));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.DataBlk := tbe.DataBlk;
+      assert(tbe.Dirty);
+      out_msg.Dirty := true;
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(s_setSharedFlip, "s", desc="hit by shared probe, status may be different") {
+    assert(is_valid(tbe));
+    tbe.Shared := true;
+  }
+
+  action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+    enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(l2m_profileMiss, "l2m", desc="l2m miss profile") {
+    ++L2cache.demand_misses;
+  }
+
+  action(l10m_profileMiss, "l10m", desc="l10m miss profile") {
+    ++L1D0cache.demand_misses;
+  }
+
+  action(l11m_profileMiss, "l11m", desc="l11m miss profile") {
+    ++L1D1cache.demand_misses;
+  }
+
+  action(l1im_profileMiss, "l1lm", desc="l1im miss profile") {
+    ++L1Icache.demand_misses;
+  }
+
+  action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
+    probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(xx_recycleResponseQueue, "xx", desc="recycle response queue") {
+    responseToCore_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+    mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  // END ACTIONS
+
+  // BEGIN TRANSITIONS
+
+  // transitions from base
+  transition(I, C0_Load_L1miss, I_E0S) {L1D0TagArrayRead, L2TagArrayRead} {
+    // track misses, if implemented
+    // since in I state, L2 miss as well
+    l2m_profileMiss;
+    l10m_profileMiss;
+    a0_allocateL1D;
+    a2_allocateL2;
+    i1_invCluster;
+    ii_invIcache;
+    n_issueRdBlk;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, C1_Load_L1miss, I_E1S) {L1D1TagArrayRead, L2TagArrayRead} {
+    // track misses, if implemented
+    // since in I state, L2 miss as well
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    a2_allocateL2;
+    i0_invCluster;
+    ii_invIcache;
+    n_issueRdBlk;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, Ifetch0_L1miss, S0) {L1ITagArrayRead,L2TagArrayRead} {
+    // track misses, if implemented
+    // L2 miss as well
+    l2m_profileMiss;
+    l1im_profileMiss;
+    ai_allocateL1I;
+    a2_allocateL2;
+    ib_invBothClusters;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} {
+    // track misses, if implemented
+    // L2 miss as well
+    l2m_profileMiss;
+    l1im_profileMiss;
+    ai_allocateL1I;
+    a2_allocateL2;
+    ib_invBothClusters;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, C0_Store_L1miss, I_M0) {L1D0TagArrayRead, L2TagArrayRead} {
+    l2m_profileMiss;
+    l10m_profileMiss;
+    a0_allocateL1D;
+    a2_allocateL2;
+    i1_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, C1_Store_L1miss, I_M1) {L1D0TagArrayRead, L2TagArrayRead} {
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    a2_allocateL2;
+    i0_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(S, C0_Load_L1miss, S_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(S, C1_Load_L1miss, S_F1) {L1D1TagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(S, Ifetch0_L1miss, Si_F0) {L1ITagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+    l1im_profileMiss;
+    ai_allocateL1I;
+    fi_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(S, Ifetch1_L1miss, Si_F1) {L1ITagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+    l1im_profileMiss;
+    ai_allocateL1I;
+    fi_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition({S}, {C0_Store_L1hit, C0_Store_L1miss}, S_M0) {L1D0TagArrayRead, L2TagArrayRead} {
+    l2m_profileMiss;
+    l10m_profileMiss;
+    a0_allocateL1D;
+    mruD0_setD0cacheMRU;
+    i1_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition({S}, {C1_Store_L1hit, C1_Store_L1miss}, S_M1) {L1D1TagArrayRead, L2TagArrayRead} {
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    mruD1_setD1cacheMRU;
+    i0_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(Es, C0_Load_L1miss, Es_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {  // can this be folded with S_F?
+    a0_allocateL1D;
+    l10m_profileMiss;
+    f0_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(Es, C1_Load_L1miss, Es_F1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} {  // can this be folded with S_F?
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(Es, Ifetch0_L1miss, S0) {L1ITagArrayRead, L1ITagArrayWrite, L2TagArrayRead, L2TagArrayWrite} {
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    ib_invBothClusters;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(Es, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} {
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    ib_invBothClusters;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  // THES SHOULD NOT BE INSTANTANEOUS BUT OH WELL FOR NOW
+  transition(Es, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+    a0_allocateL1D;
+    i1_invCluster;
+    s0_storeDone;   // instantaneous L1/L2 dirty - no writethrough delay
+    mruD0_setD0cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(Es, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+    a1_allocateL1D;
+    i0_invCluster;
+    s1_storeDone;
+    mruD1_setD1cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, C0_Load_L1miss, E0_F) {L1D0TagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, C1_Load_L1miss, E0_Es) {L1D1TagArrayRead,  L2TagArrayRead, L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, Ifetch0_L1miss, S0) {L2TagArrayRead, L1ITagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    i0_invCluster;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    i0_invCluster;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+    a0_allocateL1D;
+    s0_storeDone;
+    mruD0_setD0cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, C1_Store_L1miss, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1TagArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    i0_invCluster;
+    s1_storeDone;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, C1_Load_L1miss, E1_F) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+     l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, C0_Load_L1miss, E1_Es) {L1D0TagArrayRead,  L2TagArrayRead, L2DataArrayRead} {
+    l11m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, Ifetch1_L1miss, S1) {L2TagArrayRead,  L1ITagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    i1_invCluster;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, Ifetch0_L1miss, S0) {L2TagArrayRead, L1ITagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    i1_invCluster;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite} {
+    a1_allocateL1D;
+    s1_storeDone;
+    mruD1_setD1cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, C0_Store_L1miss, M0) {L1D0TagArrayRead, L2TagArrayRead, L2TagArrayWrite, L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite} {
+     l10m_profileMiss;
+    a0_allocateL1D;
+    i1_invCluster;
+    s0_storeDone;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition({O}, {C0_Store_L1hit, C0_Store_L1miss}, O_M0) {L1D0TagArrayRead,L2TagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue CtoD
+    l10m_profileMiss;
+    a0_allocateL1D;
+    mruD0_setD0cacheMRU;
+    i1_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition({O}, {C1_Store_L1hit, C1_Store_L1miss}, O_M1) {L1D1TagArrayRead, L2TagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+     l11m_profileMiss;
+    a1_allocateL1D;
+    mruD1_setD1cacheMRU;
+    i0_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(O, C0_Load_L1miss, O_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(O, C1_Load_L1miss, O_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} {
+     l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(Ms, C0_Load_L1miss, Ms_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(Ms, C1_Load_L1miss, Ms_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition({Ms, M0, M1, O}, Ifetch0_L1miss, MO_S0) {L1ITagArrayRead, L2DataArrayRead, L2TagArrayRead} {
+    l2m_profileMiss;  // permissions miss
+    l1im_profileMiss;
+    ai_allocateL1I;
+    t_allocateTBE;
+    ib_invBothClusters;
+    vd_victim;
+//    i2_invL2;
+    p_popMandatoryQueue;
+  }
+
+  transition({Ms, M0, M1, O}, Ifetch1_L1miss, MO_S1) {L1ITagArrayRead, L2TagArrayRead, L2DataArrayRead } {
+    l2m_profileMiss;  // permissions miss
+     l1im_profileMiss;
+    ai_allocateL1I;
+    t_allocateTBE;
+    ib_invBothClusters;
+    vd_victim;
+//    i2_invL2;
+    p_popMandatoryQueue;
+  }
+
+  transition(Ms, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+    a0_allocateL1D;
+    i1_invCluster;
+    s0_storeDone;
+    mruD0_setD0cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(Ms, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+    a1_allocateL1D;
+    i0_invCluster;
+    s1_storeDone;
+    mruD1_setD1cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(M0, C0_Load_L1miss, M0_F) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+     l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(M0, C1_Load_L1miss, M0_Ms) {L2TagArrayRead, L2DataArrayRead,L1D0TagArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(M0, {C0_Store_L1hit, C0_Store_L1miss}) {L1D0TagArrayRead,L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead} {
+    a0_allocateL1D;
+    s0_storeDone;
+    mruD0_setD0cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(M0, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead, L2TagArrayWrite} {
+    a1_allocateL1D;
+    i0_invCluster;
+    s1_storeDone;
+    mruD1_setD1cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(M1, C0_Load_L1miss, M1_Ms) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(M1, C1_Load_L1miss, M1_F) {L1D1TagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+    a1_allocateL1D;
+    f1_L2ToL1;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(M1, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+    a0_allocateL1D;
+    i1_invCluster;
+    s0_storeDone;
+    mruD0_setD0cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(M1, {C1_Store_L1hit, C1_Store_L1miss}) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayWrite} {
+    a1_allocateL1D;
+    s1_storeDone;
+    mruD1_setD1cacheMRU;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  // end transitions from base
+
+  // Begin simple hit transitions
+  transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es,
+          Ms_F1, M0_Ms}, C0_Load_L1hit) {L1D0TagArrayRead, L1D0DataArrayRead} {
+    // track hits, if implemented
+    l0_loadDone;
+    mruD0_setD0cacheMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es,
+          Ms_F0, M1_Ms}, C1_Load_L1hit) {L1D1TagArrayRead, L1D1DataArrayRead} {
+    // track hits, if implemented
+    l1_loadDone;
+    mruD1_setD1cacheMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition({S, S_C, S_F0, S_F1, S_F}, Ifetch0_L1hit) {L1ITagArrayRead, L1IDataArrayRead} {
+    // track hits, if implemented
+    il0_loadDone;
+    mruI_setIcacheMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition({S, S_C, S_F0, S_F1, S_F}, Ifetch1_L1hit) {L1ITagArrayRead, L1IDataArrayWrite} {
+    // track hits, if implemented
+    il1_loadDone;
+    mruI_setIcacheMRU;
+    p_popMandatoryQueue;
+  }
+
+  // end simple hit transitions
+
+  // Transitions from transient states
+
+  // recycles
+  transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+          IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F,
+          E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, C0_Load_L1hit) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M1,
+          O_M1, S0, S1, I_C, S0_C, S1_C, S_C}, C0_Load_L1miss) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+          IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F,
+          E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, C1_Load_L1hit) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M0,
+          O_M0, S0, S1, I_C, S0_C, S1_C, S_C},  C1_Load_L1miss) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, {Ifetch0_L1hit, Ifetch1_L1hit}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES,
+          IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, ES_I, MO_I, S_F0, S_F1, S_F,
+          O_F0, O_F1, O_F, S_M0, S_M1, O_M0, O_M1, Es_F0, Es_F1, Es_F, E0_F,
+          E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, I_C,
+          S_C}, {Ifetch0_L1miss, Ifetch1_L1miss}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_E1S, IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, S_F1, O_F1,
+          Si_F0, Si_F1, S_M1, O_M1, S0, S1, Es_F1, E1_F, E0_Es, Ms_F1, M0_Ms,
+          M1_F, I_C, S0_C, S1_C, S_C}, {C0_Store_L1miss}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_E0S, IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1 S_F0, O_F0,
+          Si_F0, Si_F1, S_M0, O_M0, S0, S1, Es_F0, E0_F, E1_Es, Ms_F0, M0_F,
+          M1_Ms, I_C, S0_C, S1_C, S_C}, {C1_Store_L1miss}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+          IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M0, O_M0, Es_F0, Es_F1, Es_F, E0_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_Ms}, {C0_Store_L1hit}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+          IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M1,
+          O_M1, Es_F0, Es_F1, Es_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F,
+          M0_Ms, M1_F, M1_Ms}, {C1_Store_L1hit}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+          IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F,
+          E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, L1D0_Repl) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+          IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F,
+          E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, L1D1_Repl) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, L1I_Repl) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({S_C, S0_C, S1_C, S0, S1, Si_F0, Si_F1, I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, S_M0, O_M0, S_M1, O_M1, Es_F0, Es_F1, Es_F, E0_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, MO_S0, MO_S1, IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, L2_Repl) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, {NB_AckS,
+          PrbInvData, PrbInv, PrbShrData}) {} {
+    yy_recycleProbeQueue;  // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary.
+  }
+
+  transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES}, NB_AckE) {} {
+    xx_recycleResponseQueue;  // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary.
+  }
+
+  transition({E0_Es, E1_F, Es_F1}, C0_Load_L1miss, Es_F) {L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(S_F1, C0_Load_L1miss, S_F) {L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(O_F1, C0_Load_L1miss, O_F) {L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition({Ms_F1, M0_Ms, M1_F}, C0_Load_L1miss, Ms_F) {L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_M0, C1_Load_L1miss, I_M0Ms) {} {
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_M1, C0_Load_L1miss, I_M1Ms) {} {
+    l2m_profileMiss;
+    l10m_profileMiss;
+    a0_allocateL1D;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_M0, C1_Store_L1miss, I_M0M1) {} {
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_M1, C0_Store_L1miss, I_M1M0) {} {
+    l2m_profileMiss;
+    l10m_profileMiss;
+    a0_allocateL1D;
+    mru_setMRU;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_E0S, C1_Load_L1miss, I_ES) {} {
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_E1S, C0_Load_L1miss, I_ES) {} {
+    l2m_profileMiss;
+    l10m_profileMiss;
+    a0_allocateL1D;
+    p_popMandatoryQueue;
+  }
+
+  transition({E1_Es, E0_F, Es_F0}, C1_Load_L1miss, Es_F) {L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(S_F0, C1_Load_L1miss, S_F) {L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(O_F0, C1_Load_L1miss, O_F) {L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition({Ms_F0, M1_Ms, M0_F}, C1_Load_L1miss, Ms_F) { L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es, Ms_F1, M0_Ms}, L1D0_Repl) {L1D0TagArrayRead} {
+    i0_invCluster;
+  }
+
+  transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es, Ms_F0, M1_Ms}, L1D1_Repl) {L1D1TagArrayRead} {
+    i1_invCluster;
+  }
+
+  transition({S, S_C, S_F0, S_F1}, L1I_Repl) {L1ITagArrayRead} {
+    ii_invIcache;
+  }
+
+  transition({S, E0, E1, Es}, L2_Repl, ES_I) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead, L1D1TagArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    t_allocateTBE;
+    vc_victim;
+    ib_invBothClusters;
+    i2_invL2;
+    ii_invIcache;
+  }
+
+  transition({Ms, M0, M1, O}, L2_Repl, MO_I) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead, L1D1TagArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    t_allocateTBE;
+    vd_victim;
+    i2_invL2;
+    ib_invBothClusters;  // nothing will happen for D0 on M1, vice versa
+  }
+
+  transition(S0, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    wi_writeIcache;
+    xi0_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(S1, NB_AckS, S) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    wi_writeIcache;
+    xi1_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(S0_C, NB_AckS, S_C) {L1D0DataArrayWrite,L2DataArrayWrite} {
+    wi_writeIcache;
+    xi0_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(S1_C, NB_AckS, S_C) {L1D1DataArrayWrite, L2DataArrayWrite} {
+    wi_writeIcache;
+    xi1_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M0, NB_AckM, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+    w0_writeDcache;
+    xs0_storeDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    w1_writeDcache;
+    xs1_storeDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  // THESE MO->M1 should not be instantaneous but oh well for now.
+  transition(I_M0M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    w0_writeDcache;
+    xs0_storeDone;
+    uu_sendUnblock;
+    i0_invCluster;
+    s1_storeDone;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M1M0, NB_AckM, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    w1_writeDcache;
+    xs1_storeDone;
+    uu_sendUnblock;
+    i1_invCluster;
+    s0_storeDone;
+    pr_popResponseQueue;
+  }
+
+  // Above shoudl be more like this, which has some latency to xfer to L1
+  transition(I_M0Ms, NB_AckM, M0_Ms) {L1D0DataArrayWrite,L2DataArrayWrite} {
+    w0_writeDcache;
+    xs0_storeDone;
+    uu_sendUnblock;
+    f1_L2ToL1;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M1Ms, NB_AckM, M1_Ms) {L1D1DataArrayWrite, L2DataArrayWrite} {
+    w1_writeDcache;
+    xs1_storeDone;
+    uu_sendUnblock;
+    f0_L2ToL1;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E0S, NB_AckE, E0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    w0_writeDcache;
+    xl0_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E1S, NB_AckE, E1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    w1_writeDcache;
+    xl1_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_ES, NB_AckE, Es) {L1D1DataArrayWrite, L1D1TagArrayWrite, L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite } {
+    w0_writeDcache;
+    xl0_loadDone;
+    w1_writeDcache;
+    xl1_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E0S, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+    w0_writeDcache;
+    xl0_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E1S, NB_AckS, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+    w1_writeDcache;
+    xl1_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_ES, NB_AckS, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite,  L2DataArrayWrite} {
+    w0_writeDcache;
+    xl0_loadDone;
+    w1_writeDcache;
+    xl1_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(S_F0, L2_to_L1D0, S) {L1D0TagArrayWrite, L1D0DataArrayWrite,  L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(S_F1, L2_to_L1D1, S) {L1D1TagArrayWrite, L1D1DataArrayWrite,  L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Si_F0, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite,  L2TagArrayWrite, L2DataArrayRead} {
+    ci_copyL2ToL1;
+    mru_setMRU;
+    il0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Si_F1, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite,  L2TagArrayWrite, L2DataArrayRead} {
+    ci_copyL2ToL1;
+    mru_setMRU;
+    il1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(S_F, L2_to_L1D0, S_F1) { L1D0DataArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(S_F, L2_to_L1D1, S_F0) { L1D1DataArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(O_F0, L2_to_L1D0, O) { L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(O_F1, L2_to_L1D1, O) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(O_F, L2_to_L1D0, O_F1) { L1D0DataArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(O_F, L2_to_L1D1, O_F0) { L1D1DataArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(M1_F, L2_to_L1D1, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(M0_F, L2_to_L1D0, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Ms_F0, L2_to_L1D0, Ms) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Ms_F1, L2_to_L1D1, Ms) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Ms_F, L2_to_L1D0, Ms_F1) {L1D0DataArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Ms_F, L2_to_L1D1, Ms_F0) {L1IDataArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(M1_Ms, L2_to_L1D0, Ms) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(M0_Ms, L2_to_L1D1, Ms) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Es_F0, L2_to_L1D0, Es) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Es_F1, L2_to_L1D1, Es) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Es_F, L2_to_L1D0, Es_F1) {L2TagArrayRead, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Es_F, L2_to_L1D1, Es_F0) {L2TagArrayRead, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(E0_F, L2_to_L1D0, E0) {L2TagArrayRead, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(E1_F, L2_to_L1D1, E1) {L2TagArrayRead, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(E1_Es, L2_to_L1D0, Es) {L2TagArrayRead, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    mru_setMRU;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(E0_Es, L2_to_L1D1, Es) {L2TagArrayRead, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    mru_setMRU;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(IF_E0S, L2_to_L1D0, I_E0S) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF_E1S, L2_to_L1D1, I_E1S) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF_ES, L2_to_L1D0, IF1_ES) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF_ES, L2_to_L1D1, IF0_ES) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF0_ES, L2_to_L1D0, I_ES) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF1_ES, L2_to_L1D1, I_ES) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(F_S0, L2_to_L1I, S0) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(F_S1, L2_to_L1I, S1) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition({S_M0, O_M0}, NB_AckM, M0) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    mru_setMRU;
+    xs0_storeDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition({S_M1, O_M1}, NB_AckM, M1) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    mru_setMRU;
+    xs1_storeDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(MO_I, NB_AckWB, I) {L2TagArrayWrite} {
+    wb_data;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(ES_I, NB_AckWB, I) {L2TagArrayWrite} {
+    wb_data;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(MO_S0, NB_AckWB, S0) {L2TagArrayWrite} {
+    wb_data;
+    i2_invL2;
+    a2_allocateL2;
+    d_deallocateTBE; // FOO
+    nS_issueRdBlkS;
+    pr_popResponseQueue;
+  }
+
+  transition(MO_S1, NB_AckWB, S1) {L2TagArrayWrite} {
+    wb_data;
+    i2_invL2;
+    a2_allocateL2;
+    d_deallocateTBE; // FOO
+    nS_issueRdBlkS;
+    pr_popResponseQueue;
+  }
+
+  // Writeback cancel "ack"
+  transition(I_C, NB_AckWB, I) {L2TagArrayWrite} {
+    ss_sendStaleNotification;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(S0_C, NB_AckWB, S0) {L2TagArrayWrite} {
+    ss_sendStaleNotification;
+    pr_popResponseQueue;
+  }
+
+  transition(S1_C, NB_AckWB, S1) {L2TagArrayWrite} {
+    ss_sendStaleNotification;
+    pr_popResponseQueue;
+  }
+
+  transition(S_C, NB_AckWB, S) {L2TagArrayWrite} {
+    ss_sendStaleNotification;
+    pr_popResponseQueue;
+  }
+
+  // Begin Probe Transitions
+
+  transition({Ms, M0, M1, O}, PrbInvData, I) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pd_sendProbeResponseData;
+    i2_invL2;
+    ib_invBothClusters;
+    pp_popProbeQueue;
+  }
+
+  transition({Es, E0, E1, S, I}, PrbInvData, I) {L2TagArrayRead, L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    ib_invBothClusters;
+    ii_invIcache;  // only relevant for S
+    pp_popProbeQueue;
+  }
+
+  transition(S_C, PrbInvData, I_C) {L2TagArrayWrite} {
+    t_allocateTBE;
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, PrbInvData, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms, M0, M1, O, Es, E0, E1, S, I}, PrbInv, I) {L2TagArrayRead, L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2; // nothing will happen in I
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(S_C, PrbInv, I_C) {L2TagArrayWrite} {
+    t_allocateTBE;
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, PrbInv, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms, M0, M1, O}, PrbShrData, O) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({Es, E0, E1, S}, PrbShrData, S) {L2TagArrayRead, L2TagArrayWrite} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition(S_C, PrbShrData) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition({I, I_C}, PrbShrData) {L2TagArrayRead} {
+    pb_sendProbeResponseBackprobe;
+    pp_popProbeQueue;
+  }
+
+  transition({I_M0, I_E0S}, {PrbInv, PrbInvData}) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;  // must invalidate current data (only relevant for I_M0)
+    a0_allocateL1D;  // but make sure there is room for incoming data when it arrives
+    pp_popProbeQueue;
+  }
+
+  transition({I_M1, I_E1S}, {PrbInv, PrbInvData}) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters; // must invalidate current data (only relevant for I_M1)
+    a1_allocateL1D;  // but make sure there is room for incoming data when it arrives
+    pp_popProbeQueue;
+  }
+
+  transition({I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_ES}, {PrbInv, PrbInvData, PrbShrData}) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    a0_allocateL1D;
+    a1_allocateL1D;
+    pp_popProbeQueue;
+  }
+
+  transition({I_M0, I_E0S, I_M1, I_E1S}, PrbShrData) {} {
+    pb_sendProbeResponseBackprobe;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, PrbInvData, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbInvData, I_C) {} {
+    pdt_sendProbeResponseDataFromTBE;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbInv, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, PrbInv, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, PrbShrData, ES_I) {} {
+    ph_sendProbeResponseHit;
+    s_setSharedFlip;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbShrData, MO_I) {} {
+    pdt_sendProbeResponseDataFromTBE;
+    s_setSharedFlip;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_S0, PrbInvData, S0_C) {L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pdt_sendProbeResponseDataFromTBE;
+    i2_invL2;
+    a2_allocateL2;
+    d_deallocateTBE;
+    nS_issueRdBlkS;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_S1, PrbInvData, S1_C) {L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pdt_sendProbeResponseDataFromTBE;
+    i2_invL2;
+    a2_allocateL2;
+    d_deallocateTBE;
+    nS_issueRdBlkS;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_S0, PrbInv, S0_C) {L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    a2_allocateL2;
+    d_deallocateTBE;
+    nS_issueRdBlkS;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_S1, PrbInv, S1_C) {L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    a2_allocateL2;
+    d_deallocateTBE;
+    nS_issueRdBlkS;
+    pp_popProbeQueue;
+  }
+
+  transition({MO_S0, MO_S1}, PrbShrData) {} {
+    pdt_sendProbeResponseDataFromTBE;
+    s_setSharedFlip;
+    pp_popProbeQueue;
+  }
+
+  transition({S_F0, Es_F0, E0_F, E1_Es}, {PrbInvData, PrbInv}, IF_E0S) {}{
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    // invalidate everything you've got
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    // but make sure you have room for what you need from the fill
+    a0_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({S_F1, Es_F1, E1_F, E0_Es}, {PrbInvData, PrbInv}, IF_E1S) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    // invalidate everything you've got
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    // but make sure you have room for what you need from the fill
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({S_F, Es_F}, {PrbInvData, PrbInv}, IF_ES) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    // invalidate everything you've got
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    // but make sure you have room for what you need from the fill
+    a0_allocateL1D;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition(Si_F0, {PrbInvData, PrbInv}, F_S0) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    nS_issueRdBlkS;
+    pp_popProbeQueue;
+  }
+
+  transition(Si_F1, {PrbInvData, PrbInv}, F_S1) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    nS_issueRdBlkS;
+    pp_popProbeQueue;
+  }
+
+  transition({Es_F0, E0_F, E1_Es}, PrbShrData, S_F0) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition({Es_F1, E1_F, E0_Es}, PrbShrData, S_F1) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition(Es_F, PrbShrData, S_F) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition({S_F0, S_F1, S_F, Si_F0, Si_F1}, PrbShrData) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition(S_M0, PrbInvData, I_M0) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pim_sendProbeResponseInvMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition(O_M0, PrbInvData, I_M0) {L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pdm_sendProbeResponseDataMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S_M0, O_M0}, {PrbInv}, I_M0) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pim_sendProbeResponseInvMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition(S_M1, PrbInvData, I_M1) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pim_sendProbeResponseInvMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition(O_M1, PrbInvData, I_M1) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pdm_sendProbeResponseDataMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S_M1, O_M1}, {PrbInv}, I_M1) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pim_sendProbeResponseInvMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S0, S0_C}, {PrbInvData, PrbInv}) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S1, S1_C}, {PrbInvData, PrbInv}) {}  {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S_M0, S_M1}, PrbShrData) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition({O_M0, O_M1}, PrbShrData) {L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({S0, S1, S0_C, S1_C}, PrbShrData) {} {
+    pb_sendProbeResponseBackprobe;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F0, M0_F, M1_Ms, O_F0}, PrbInvData, IF_E0S) { L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pd_sendProbeResponseData;
+    ib_invBothClusters;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F1, M1_F, M0_Ms, O_F1}, PrbInvData, IF_E1S) {L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pd_sendProbeResponseData;
+    ib_invBothClusters;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F, O_F}, PrbInvData, IF_ES) {L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pd_sendProbeResponseData;
+    ib_invBothClusters;
+    i2_invL2;
+    a0_allocateL1D;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F0, M0_F, M1_Ms, O_F0}, PrbInv, IF_E0S) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F1, M1_F, M0_Ms, O_F1}, PrbInv, IF_E1S) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F, O_F}, PrbInv, IF_ES) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    i2_invL2;
+    a0_allocateL1D;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F0, M0_F, M1_Ms}, PrbShrData, O_F0) {L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F1, M1_F, M0_Ms}, PrbShrData, O_F1) {} {
+  }
+
+  transition({Ms_F}, PrbShrData, O_F) {L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({O_F0, O_F1, O_F}, PrbShrData) {L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  // END TRANSITIONS
+}
+
+
diff --git a/src/mem/protocol/MOESI_AMD_Base-L3cache.sm b/src/mem/protocol/MOESI_AMD_Base-L3cache.sm
new file mode 100644
index 000000000..479cf4e78
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-L3cache.sm
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:L3Cache, "L3")
+ : CacheMemory * L3cache;
+   WireBuffer * reqToDir;
+   WireBuffer * respToDir;
+   WireBuffer * l3UnblockToDir;
+   WireBuffer * reqToL3;
+   WireBuffer * probeToL3;
+   WireBuffer * respToL3;
+   Cycles l3_request_latency := 1;
+   Cycles l3_response_latency := 35;
+
+  // To the general response network
+  MessageBuffer * responseFromL3, network="To", virtual_network="2", ordered="false", vnet_type="response";
+
+  // From the general response network
+  MessageBuffer * responseToL3, network="From", virtual_network="2", ordered="false", vnet_type="response";
+
+{
+  // EVENTS
+  enumeration(Event, desc="L3 Events") {
+    // Requests coming from the Cores
+    RdBlk,                  desc="CPU RdBlk event";
+    RdBlkM,                 desc="CPU RdBlkM event";
+    RdBlkS,                 desc="CPU RdBlkS event";
+    CtoD,                   desc="Change to Dirty request";
+    WrVicBlk,               desc="L2 Victim (dirty)";
+    WrVicBlkShared,               desc="L2 Victim (dirty)";
+    ClVicBlk,               desc="L2 Victim (clean)";
+    ClVicBlkShared,               desc="L2 Victim (clean)";
+
+    CPUData,                      desc="WB data from CPU";
+    CPUDataShared,                desc="WB data from CPU, NBReqShared 1";
+    StaleWB,                desc="WB stale; no data";
+
+    L3_Repl,             desc="L3 Replacement";
+
+    // Probes
+    PrbInvData,         desc="Invalidating probe, return dirty data";
+    PrbInv,             desc="Invalidating probe, no need to return data";
+    PrbShrData,         desc="Downgrading probe, return data";
+
+    // Coming from Memory Controller
+    WBAck,                     desc="ack from memory";
+
+    CancelWB,                   desc="Cancel WB from L2";
+  }
+
+  // STATES
+  // Base States:
+  state_declaration(State, desc="L3 State", default="L3Cache_State_I") {
+    M, AccessPermission:Read_Write, desc="Modified";  // No other cache has copy, memory stale
+    O, AccessPermission:Read_Only, desc="Owned";     // Correct most recent copy, others may exist in S
+    E, AccessPermission:Read_Write, desc="Exclusive"; // Correct, most recent, and only copy (and == Memory)
+    S, AccessPermission:Read_Only, desc="Shared";    // Correct, most recent. If no one in O, then == Memory
+    I, AccessPermission:Invalid, desc="Invalid";
+
+    I_M, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
+    I_O, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
+    I_E, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
+    I_S, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
+    S_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to M";
+    S_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
+    S_E, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to E";
+    S_S, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to S";
+    E_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
+    E_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
+    E_E, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
+    E_S, AccessPermission:Busy, desc="Shared, received WrVicBlk, sent Ack, waiting for Data";
+    O_M, AccessPermission:Busy, desc="...";
+    O_O, AccessPermission:Busy, desc="...";
+    O_E, AccessPermission:Busy, desc="...";
+    O_S, AccessPermission:Busy, desc="...";
+    M_M, AccessPermission:Busy, desc="...";
+    M_O, AccessPermission:Busy, desc="...";
+    M_E, AccessPermission:Busy, desc="...";
+    M_S, AccessPermission:Busy, desc="...";
+    D_I, AccessPermission:Invalid,  desc="drop WB data on the floor when receive";
+    MOD_I, AccessPermission:Busy, desc="drop WB data on the floor, waiting for WBAck from Mem";
+    MO_I, AccessPermission:Busy, desc="M or O, received L3_Repl, waiting for WBAck from Mem";
+    I_I, AccessPermission:Busy, desc="I_MO received L3_Repl";
+    I_CD, AccessPermission:Busy, desc="I_I received WBAck, now just waiting for CPUData";
+    I_C, AccessPermission:Invalid, desc="sent cancel, just waiting to receive mem wb ack so nothing gets confused";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+  // STRUCTURES
+
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff from memory?)";
+    DataBlock DataBlk,          desc="Data for the block";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,     desc="Transient state";
+    DataBlock DataBlk,  desc="data for the block";
+    bool Dirty,         desc="Is the data dirty?";
+    bool Shared,        desc="Victim hit by shared probe";
+    MachineID From,     desc="Waiting for writeback from...";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<L3Cache_TBE>", constructor="m_number_of_TBEs";
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+
+
+  // FUNCTION DEFINITIONS
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+    return static_cast(Entry, "pointer", L3cache.lookup(addr));
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    return getCacheEntry(addr).DataBlk;
+  }
+
+  bool presentOrAvail(Addr addr) {
+    return L3cache.isTagPresent(addr) || L3cache.cacheAvail(addr);
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if (is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+        tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+        cache_entry.CacheState := state;
+    }
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes +
+        functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return L3Cache_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return L3Cache_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(L3Cache_State_to_permission(state));
+    }
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    return true;
+  }
+
+
+  // OUT PORTS
+  out_port(requestNetwork_out, CPURequestMsg, reqToDir);
+  out_port(L3Resp_out, ResponseMsg, respToDir);
+  out_port(responseNetwork_out, ResponseMsg, responseFromL3);
+  out_port(unblockNetwork_out, UnblockMsg, l3UnblockToDir);
+
+  // IN PORTS
+  in_port(NBResponse_in, ResponseMsg, respToL3) {
+    if (NBResponse_in.isReady(clockEdge())) {
+      peek(NBResponse_in, ResponseMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+          trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+        } else {
+          DPRINTF(RubySlicc, "%s\n", in_msg);
+          error("Error on NBResponse Type");
+        }
+      }
+    }
+  }
+
+  // Response Network
+  in_port(responseNetwork_in, ResponseMsg, responseToL3) {
+    if (responseNetwork_in.isReady(clockEdge())) {
+      peek(responseNetwork_in, ResponseMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:CPUData) {
+          if (in_msg.NbReqShared) {
+            trigger(Event:CPUDataShared, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:CPUData, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+            trigger(Event:StaleWB, in_msg.addr, cache_entry, tbe);
+        } else {
+          DPRINTF(RubySlicc, "%s\n", in_msg);
+          error("Error on NBResponse Type");
+        }
+      }
+    }
+  }
+
+  // probe network
+  in_port(probeNetwork_in, NBProbeRequestMsg, probeToL3) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+      peek(probeNetwork_in, NBProbeRequestMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        if (in_msg.Type == ProbeRequestType:PrbInv) {
+          if (in_msg.ReturnData) {
+            trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+          if (in_msg.ReturnData) {
+            trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+          } else {
+            error("Don't think I should get any of these");
+          }
+        }
+      }
+    }
+  }
+
+  // Request Network
+  in_port(requestNetwork_in, CPURequestMsg, reqToL3) {
+    if (requestNetwork_in.isReady(clockEdge())) {
+      peek(requestNetwork_in, CPURequestMsg) {
+        assert(in_msg.Destination.isElement(machineID));
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        if (in_msg.Type == CoherenceRequestType:RdBlk) {
+          trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+          trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+          trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+          if (presentOrAvail(in_msg.addr)) {
+            if (in_msg.Shared) {
+              trigger(Event:ClVicBlkShared, in_msg.addr, cache_entry, tbe);
+            } else {
+              trigger(Event:ClVicBlk, in_msg.addr, cache_entry, tbe);
+            }
+          } else {
+            Addr victim :=  L3cache.cacheProbe(in_msg.addr);
+            trigger(Event:L3_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+          if (presentOrAvail(in_msg.addr)) {
+            if (in_msg.Shared) {
+              trigger(Event:WrVicBlkShared, in_msg.addr, cache_entry, tbe);
+            } else {
+              trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
+            }
+          } else {
+            Addr victim := L3cache.cacheProbe(in_msg.addr);
+            trigger(Event:L3_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+          }
+        } else if (in_msg.Type == CoherenceRequestType:WrCancel) {
+          if (is_valid(tbe) && tbe.From == in_msg.Requestor) {
+            trigger(Event:CancelWB, in_msg.addr, cache_entry, tbe);
+          } else {
+            requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+          }
+        }
+      }
+    }
+  }
+
+  // BEGIN ACTIONS
+
+  action(i_invL3, "i", desc="invalidate L3 cache block") {
+    if (is_valid(cache_entry)) {
+        L3cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(rm_sendResponseM, "rm", desc="send Modified response") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l3_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.Dirty := cache_entry.Dirty;
+        out_msg.State := CoherenceState:Modified;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(rs_sendResponseS, "rs", desc="send Shared response") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l3_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.Dirty := cache_entry.Dirty;
+        out_msg.State := CoherenceState:Shared;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+
+  action(r_requestToMem, "r", desc="Miss in L3, pass on") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(requestNetwork_out, CPURequestMsg, l3_request_latency) {
+        out_msg.addr := address;
+        out_msg.Type := in_msg.Type;
+        out_msg.Requestor := in_msg.Requestor;
+        out_msg.Destination.add(map_Address_to_Directory(address));
+        out_msg.Shared := false; // unneeded for this request
+        out_msg.MessageSize := in_msg.MessageSize;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+    if (is_valid(cache_entry)) {
+      tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
+      tbe.Dirty := cache_entry.Dirty;
+    }
+    tbe.From := machineID;
+  }
+
+  action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(vd_vicDirty, "vd", desc="Victimize dirty L3 data") {
+    enqueue(requestNetwork_out, CPURequestMsg, l3_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:VicDirty;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+    }
+  }
+
+  action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l3_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysWBAck;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+      }
+    }
+  }
+
+  action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+    enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(ph_sendProbeResponseHit, "ph", desc="send probe ack, no data") {
+    enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.Dirty := false;
+      out_msg.Hit := true;
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pm_sendProbeResponseMiss, "pm", desc="send probe ack, no data") {
+    enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+    enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.DataBlk := cache_entry.DataBlk;
+      assert(cache_entry.Dirty);
+      out_msg.Dirty := true;
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") {
+    enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.DataBlk := tbe.DataBlk;
+      assert(tbe.Dirty);
+      out_msg.Dirty := true;
+      out_msg.Hit := true;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.State := CoherenceState:NA;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(mc_cancelMemWriteback, "mc", desc="send writeback cancel to memory") {
+    enqueue(requestNetwork_out, CPURequestMsg, l3_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:WrCancel;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+    }
+  }
+
+  action(a_allocateBlock, "a", desc="allocate L3 block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L3cache.allocate(address, new Entry));
+    }
+  }
+
+  action(d_writeData, "d", desc="write data to L3") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (in_msg.Dirty) {
+        cache_entry.Dirty := in_msg.Dirty;
+      }
+      cache_entry.DataBlk := in_msg.DataBlk;
+      DPRINTF(RubySlicc, "Writing to L3: %s\n", in_msg);
+    }
+  }
+
+  action(rd_copyDataFromRequest, "rd", desc="write data to L3") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := true;
+    }
+  }
+
+  action(f_setFrom, "f", desc="set who WB is expected to come from") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      tbe.From := in_msg.Requestor;
+    }
+  }
+
+  action(rf_resetFrom, "rf", desc="reset From") {
+    tbe.From := machineID;
+  }
+
+  action(wb_data, "wb", desc="write back data") {
+    enqueue(L3Resp_out, ResponseMsg, l3_request_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUData;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.Dirty := tbe.Dirty;
+      if (tbe.Shared) {
+        out_msg.NbReqShared := true;
+      } else {
+        out_msg.NbReqShared := false;
+      }
+      out_msg.State := CoherenceState:Shared; // faux info
+      out_msg.MessageSize := MessageSizeType:Writeback_Data;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(wt_writeDataToTBE, "wt", desc="write WB data to TBE") {
+    peek(responseNetwork_in, ResponseMsg) {
+      tbe.DataBlk := in_msg.DataBlk;
+      tbe.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+    enqueue(unblockNetwork_out, UnblockMsg, l3_request_latency) {
+      out_msg.addr := address;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
+    L3cache.setMRU(address);
+  }
+
+  action(p_popRequestQueue, "p", desc="pop request queue") {
+    requestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="pop response queue") {
+    responseNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pn_popNBResponseQueue, "pn", desc="pop NB response queue") {
+    NBResponse_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+
+  action(zz_recycleRequestQueue, "\z", desc="recycle request queue") {
+    requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+
+  // END ACTIONS
+
+  // BEGIN TRANSITIONS
+
+  // transitions from base
+
+  transition({I, I_C}, {RdBlk, RdBlkS, RdBlkM, CtoD}) {TagArrayRead} {
+    r_requestToMem;
+    p_popRequestQueue;
+  }
+
+  transition(O, RdBlk ) {TagArrayRead, DataArrayRead} {
+    rs_sendResponseS;
+    ut_updateTag;
+    p_popRequestQueue;
+  }
+  transition(M, RdBlk, O) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+    rs_sendResponseS;
+    ut_updateTag;
+    p_popRequestQueue;
+  }
+
+  transition(S, RdBlk) {TagArrayRead, DataArrayRead} {
+    rs_sendResponseS;
+    ut_updateTag;
+    p_popRequestQueue;
+  }
+  transition(E, RdBlk, S) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+    rs_sendResponseS;
+    ut_updateTag;
+    p_popRequestQueue;
+  }
+
+  transition({M, O}, RdBlkS, O) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+    rs_sendResponseS;
+    ut_updateTag;
+    p_popRequestQueue;
+  }
+
+  transition({E, S}, RdBlkS, S) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+    rs_sendResponseS;
+    ut_updateTag;
+    p_popRequestQueue;
+  }
+
+  transition(M, RdBlkM, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+    rm_sendResponseM;
+    i_invL3;
+    p_popRequestQueue;
+  }
+
+  transition({O, S}, {RdBlkM, CtoD}) {TagArrayRead} {
+    r_requestToMem;  // can't handle this, just forward
+    p_popRequestQueue;
+  }
+
+  transition(E, RdBlkM, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+    rm_sendResponseM;
+    i_invL3;
+    p_popRequestQueue;
+  }
+
+  transition({I}, WrVicBlk, I_M) {TagArrayRead, TagArrayWrite} {
+    a_allocateBlock;
+    t_allocateTBE;
+    f_setFrom;
+//    rd_copyDataFromRequest;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(I_C, {WrVicBlk, WrVicBlkShared, ClVicBlk, ClVicBlkShared}) {} {
+    zz_recycleRequestQueue;
+  }
+
+  transition({I}, WrVicBlkShared, I_O) {TagArrayRead, TagArrayWrite} {
+    a_allocateBlock;
+    t_allocateTBE;
+    f_setFrom;
+//    rd_copyDataFromRequest;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(S, WrVicBlkShared, S_O) {TagArrayRead, TagArrayWrite} {
+//    rd_copyDataFromRequest;
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(S, WrVicBlk, S_M) {TagArrayRead, TagArrayWrite} { // should be technically not possible, but assume the data comes back with shared bit flipped
+//    rd_copyDataFromRequest;
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(E, WrVicBlk, E_M) {TagArrayRead, TagArrayWrite}  {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(E, WrVicBlkShared, E_O) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(O, WrVicBlk, O_M) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(O, WrVicBlkShared, O_O) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(M, WrVicBlk, M_M) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(M, WrVicBlkShared, M_O) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition({I}, ClVicBlk, I_E) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    a_allocateBlock;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition({I}, ClVicBlkShared, I_S) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    a_allocateBlock;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(S, ClVicBlk, S_E) {TagArrayRead, TagArrayWrite} { // technically impossible, assume data comes back with shared bit flipped
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(S, ClVicBlkShared, S_S) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(E, ClVicBlk, E_E) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(E, ClVicBlkShared, E_S) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(O, ClVicBlk, O_E) {TagArrayRead, TagArrayWrite} { // technically impossible, but assume data comes back with shared bit flipped
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(O, ClVicBlkShared, O_S) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(M, ClVicBlk, M_E) {TagArrayRead, TagArrayWrite}  {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(M, ClVicBlkShared, M_S) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition({MO_I}, {RdBlk, RdBlkS, RdBlkM, CtoD}) {} {
+    r_requestToMem;
+    p_popRequestQueue;
+  }
+
+  transition(MO_I, {WrVicBlkShared, WrVicBlk, ClVicBlk, ClVicBlkShared}, MOD_I) {TagArrayWrite} {
+    f_setFrom;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(I_M, CPUData, M) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M, CPUDataShared, O) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_O, {CPUData, CPUDataShared}, O) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E, CPUData, E) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E, CPUDataShared, S) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(I_S, {CPUData, CPUDataShared}, S) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    pr_popResponseQueue;
+  }
+
+  transition(S_M, CPUDataShared, O) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(S_O, {CPUData, CPUDataShared}, O) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(S_E, CPUDataShared, S) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(S_S, {CPUData, CPUDataShared}, S) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(O_E, CPUDataShared, O) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition(O_S, {CPUData, CPUDataShared}, O) {DataArrayWrite, TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    d_writeData;
+    ut_updateTag;  // update tag on writeback hits.
+    pr_popResponseQueue;
+  }
+
+  transition({D_I}, {CPUData, CPUDataShared}, I) {TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(MOD_I, {CPUData, CPUDataShared}, MO_I) {TagArrayWrite} {
+    uu_sendUnblock;
+    rf_resetFrom;
+    pr_popResponseQueue;
+  }
+
+  transition(I_I, {CPUData, CPUDataShared}, MO_I) {TagArrayWrite, DataArrayRead} {
+    uu_sendUnblock;
+    wt_writeDataToTBE;
+    rf_resetFrom;
+    pr_popResponseQueue;
+  }
+
+  transition(I_CD, {CPUData, CPUDataShared}, I) {DataArrayRead, TagArrayWrite} {
+    uu_sendUnblock;
+    wt_writeDataToTBE;
+    wb_data;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition({M, O}, L3_Repl, MO_I) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    vd_vicDirty;
+    i_invL3;
+  }
+
+  transition({E, S,}, L3_Repl, I) {TagArrayRead, TagArrayWrite} {
+    i_invL3;
+  }
+
+  transition({I_M, I_O, S_M, S_O, E_M, E_O}, L3_Repl) {} {
+    zz_recycleRequestQueue;
+  }
+
+  transition({O_M, O_O, O_E, O_S, M_M, M_O, M_E, M_S}, L3_Repl) {} {
+    zz_recycleRequestQueue;
+  }
+
+  transition({I_E, I_S, S_E, S_S, E_E, E_S}, L3_Repl) {} {
+    zz_recycleRequestQueue;
+  }
+
+  transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+    pd_sendProbeResponseData;
+    i_invL3;
+    pp_popProbeQueue;
+  }
+
+  transition({E, S, I}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    i_invL3;  // nothing will happen in I
+    pp_popProbeQueue;
+  }
+
+  transition({M, O, E, S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    i_invL3; // nothing will happen in I
+    pp_popProbeQueue;
+  }
+
+  transition({M, O}, PrbShrData, O) {TagArrayRead, DataArrayRead, TagArrayWrite} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({E, S}, PrbShrData, S) {TagArrayRead, TagArrayWrite} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition(I, PrbShrData) {TagArrayRead} {
+    pm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbInvData, I_C) {TagArrayWrite, DataArrayRead} {
+    pdt_sendProbeResponseDataFromTBE;
+    mc_cancelMemWriteback;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbInv, I_C) {TagArrayWrite} {
+    pi_sendProbeResponseInv;
+    mc_cancelMemWriteback;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbShrData) {DataArrayRead} {
+    pdt_sendProbeResponseDataFromTBE;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, {PrbInvData, PrbInv}) {} {
+    pi_sendProbeResponseInv;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, PrbShrData) {} {
+    pm_sendProbeResponseMiss;
+    pp_popProbeQueue;
+  }
+
+  transition(I_I, {WBAck}, I_CD) {TagArrayWrite} {
+    pn_popNBResponseQueue;
+  }
+
+  transition(MOD_I, WBAck, D_I) {DataArrayRead} {
+    wb_data;
+    pn_popNBResponseQueue;
+  }
+
+  transition(MO_I, WBAck, I) {DataArrayRead, TagArrayWrite} {
+    wb_data;
+    dt_deallocateTBE;
+    pn_popNBResponseQueue;
+  }
+
+  transition(I_C, {WBAck}, I) {TagArrayWrite} {
+    dt_deallocateTBE;
+    pn_popNBResponseQueue;
+  }
+
+  transition({I_M, I_O, I_E, I_S}, CancelWB, I) {TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    i_invL3;
+    p_popRequestQueue;
+  }
+
+  transition({S_S, S_O, S_M, S_E}, CancelWB, S) {TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition({E_M, E_O, E_E, E_S}, CancelWB, E) {TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition({O_M, O_O, O_E, O_S}, CancelWB, O) {TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition({M_M, M_O, M_E, M_S}, CancelWB, M) {TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition(D_I, CancelWB, I) {TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition(MOD_I, CancelWB, MO_I) {TagArrayWrite} {
+    uu_sendUnblock;
+    rf_resetFrom;
+    p_popRequestQueue;
+  }
+
+  transition(I_I, CancelWB, I_C) {TagArrayWrite} {
+    uu_sendUnblock;
+    rf_resetFrom;
+    mc_cancelMemWriteback;
+    p_popRequestQueue;
+  }
+
+  transition(I_CD, CancelWB, I) {TagArrayWrite} {
+    uu_sendUnblock;
+    dt_deallocateTBE;
+    mc_cancelMemWriteback;
+    p_popRequestQueue;
+  }
+
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm b/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm
new file mode 100644
index 000000000..fd84447a2
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm
@@ -0,0 +1,3009 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:CorePair, "CP-like Core Coherence")
+ : Sequencer * sequencer;
+   Sequencer * sequencer1;
+   CacheMemory * L1Icache;
+   CacheMemory * L1D0cache;
+   CacheMemory * L1D1cache;
+   CacheMemory * L2cache;
+   int regionBufferNum;
+   bool send_evictions := "False";
+   Cycles issue_latency := 5;
+   Cycles l2_hit_latency := 18;
+
+  // BEGIN Core Buffers
+
+  // To the Network
+  MessageBuffer * requestFromCore, network="To", virtual_network="0", ordered="true", vnet_type="request";
+  MessageBuffer * responseFromCore, network="To", virtual_network="2", ordered="false", vnet_type="response";
+  MessageBuffer * unblockFromCore, network="To", virtual_network="4", ordered="false", vnet_type="unblock";
+
+  // From the Network
+  MessageBuffer * probeToCore, network="From", virtual_network="0", ordered="false", vnet_type="request";
+  MessageBuffer * responseToCore, network="From", virtual_network="2", ordered="false", vnet_type="response";
+
+  MessageBuffer * mandatoryQueue, ordered="false";
+  MessageBuffer * triggerQueue, ordered="true";
+
+  // END Core Buffers
+
+{
+  // BEGIN STATES
+  state_declaration(State, desc="Cache states", default="CorePair_State_I") {
+
+    I, AccessPermission:Invalid, desc="Invalid";
+    S, AccessPermission:Read_Only, desc="Shared";
+    E0, AccessPermission:Read_Write, desc="Exclusive with Cluster 0 ownership";
+    E1, AccessPermission:Read_Write, desc="Exclusive with Cluster 1 ownership";
+    Es, AccessPermission:Read_Write, desc="Exclusive in core";
+    O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line";
+    Ms, AccessPermission:Read_Write, desc="Modified in core, both clusters may be sharing line";
+    M0, AccessPermission:Read_Write, desc="Modified with cluster ownership";
+    M1, AccessPermission:Read_Write, desc="Modified with cluster ownership";
+
+    // Transient States
+    I_M0, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+    I_M1, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
+    I_M0M1, AccessPermission:Busy, desc="Was in I_M0, got a store request from other cluster as well";
+    I_M1M0, AccessPermission:Busy, desc="Was in I_M1, got a store request from other cluster as well";
+    I_M0Ms, AccessPermission:Busy, desc="Was in I_M0, got a load request from other cluster as well";
+    I_M1Ms, AccessPermission:Busy, desc="Was in I_M1, got a load request from other cluster as well";
+    I_E0S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+    I_E1S, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
+    I_ES, AccessPermission:Busy, desc="S_F got hit by invalidating probe, RdBlk response needs to go to both clusters";
+
+    IF_E0S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E0S but expecting a L2_to_L1D0 trigger, just drop when receive";
+    IF_E1S, AccessPermission:Busy, desc="something got hit with Probe Invalidate, now just I_E1S but expecting a L2_to_L1D1 trigger, just drop when receive";
+    IF_ES, AccessPermission:Busy, desc="same, but waiting for two fills";
+    IF0_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one";
+    IF1_ES, AccessPermission:Busy, desc="same, but waiting for two fills, got one";
+    F_S0, AccessPermission:Busy, desc="same, but going to S0 when trigger received";
+    F_S1, AccessPermission:Busy, desc="same, but going to S1 when trigger received";
+
+    ES_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for clean writeback ack";
+    MO_I, AccessPermission:Read_Only, desc="L2 replacement, waiting for dirty writeback ack";
+    MO_S0, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS";
+    MO_S1, AccessPermission:Read_Only, desc="M/O got Ifetch Miss, must write back first, then send RdBlkS";
+    S_F0, AccessPermission:Read_Only,  desc="Shared, filling L1";
+    S_F1, AccessPermission:Read_Only,  desc="Shared, filling L1";
+    S_F, AccessPermission:Read_Only,   desc="Shared, filling L1";
+    O_F0, AccessPermission:Read_Only,  desc="Owned, filling L1";
+    O_F1, AccessPermission:Read_Only,  desc="Owned, filling L1";
+    O_F,  AccessPermission:Read_Only,  desc="Owned, filling L1";
+    Si_F0, AccessPermission:Read_Only, desc="Shared, filling icache";
+    Si_F1, AccessPermission:Read_Only, desc="Shared, filling icache";
+    S_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+    S_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+    O_M0, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+    O_M1, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
+    S0, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 0, waiting for response";
+    S1, AccessPermission:Busy, desc="RdBlkS on behalf of cluster 1, waiting for response";
+
+    Es_F0, AccessPermission:Read_Write, desc="Es, Cluster read, filling";
+    Es_F1, AccessPermission:Read_Write, desc="Es, Cluster read, filling";
+    Es_F, AccessPermission:Read_Write,  desc="Es, other cluster read, filling";
+    E0_F, AccessPermission:Read_Write, desc="E0, cluster read, filling";
+    E1_F, AccessPermission:Read_Write, desc="...";
+    E0_Es, AccessPermission:Read_Write, desc="...";
+    E1_Es, AccessPermission:Read_Write, desc="...";
+    Ms_F0, AccessPermission:Read_Write, desc="...";
+    Ms_F1, AccessPermission:Read_Write, desc="...";
+    Ms_F, AccessPermission:Read_Write,  desc="...";
+    M0_F, AccessPermission:Read_Write, desc="...";
+    M0_Ms, AccessPermission:Read_Write, desc="...";
+    M1_F, AccessPermission:Read_Write, desc="...";
+    M1_Ms, AccessPermission:Read_Write, desc="...";
+
+    I_C, AccessPermission:Invalid, desc="Invalid, but waiting for WBAck from NB from canceled writeback";
+    S0_C, AccessPermission:Busy, desc="MO_S0 hit by invalidating probe, waiting for WBAck form NB for canceled WB";
+    S1_C, AccessPermission:Busy, desc="MO_S1 hit by invalidating probe, waiting for WBAck form NB for canceled WB";
+    S_C, AccessPermission:Busy, desc="S*_C got NB_AckS, still waiting for WBAck";
+
+  } // END STATES
+
+  // BEGIN EVENTS
+  enumeration(Event, desc="CP Events") {
+    // CP Initiated events
+    C0_Load_L1miss,            desc="Cluster 0 load, L1 missed";
+    C0_Load_L1hit,             desc="Cluster 0 load, L1 hit";
+    C1_Load_L1miss,            desc="Cluster 1 load L1 missed";
+    C1_Load_L1hit,             desc="Cluster 1 load L1 hit";
+    Ifetch0_L1hit,             desc="Instruction fetch, hit in the L1";
+    Ifetch1_L1hit,             desc="Instruction fetch, hit in the L1";
+    Ifetch0_L1miss,            desc="Instruction fetch, missed in the L1";
+    Ifetch1_L1miss,            desc="Instruction fetch, missed in the L1";
+    C0_Store_L1miss,           desc="Cluster 0 store missed in L1";
+    C0_Store_L1hit,            desc="Cluster 0 store hit in L1";
+    C1_Store_L1miss,           desc="Cluster 1 store missed in L1";
+    C1_Store_L1hit,            desc="Cluster 1 store hit in L1";
+    // NB Initiated events
+    NB_AckS,             desc="NB Ack to Core Request";
+    NB_AckM,             desc="NB Ack to Core Request";
+    NB_AckE,             desc="NB Ack to Core Request";
+
+    NB_AckWB,            desc="NB Ack for writeback";
+
+    // Memory System initiatied events
+    L1I_Repl,           desc="Replace address from L1I"; // Presumed clean
+    L1D0_Repl,           desc="Replace address from L1D0"; // Presumed clean
+    L1D1_Repl,           desc="Replace address from L1D1"; // Presumed clean
+    L2_Repl,            desc="Replace address from L2";
+
+    L2_to_L1D0,           desc="L1 fill from L2";
+    L2_to_L1D1,           desc="L1 fill from L2";
+    L2_to_L1I,           desc="L1 fill from L2";
+
+    // Probe Events
+    PrbInvData,         desc="probe, return O or M data";
+    PrbInvDataDemand,     desc="probe, return O or M data. Demand request";
+    PrbInv,             desc="probe, no need for data";
+    PrbShrData,         desc="probe downgrade, return O or M data";
+    PrbShrDataDemand,     desc="probe downgrade, return O or M data. Demand request";
+    ForceRepl,          desc="probe from r-buf. Act as though a repl";
+    ForceDowngrade,     desc="probe from r-buf. Act as though a repl";
+
+  }  // END EVENTS
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    L1D0DataArrayRead,    desc="Read the data array";
+    L1D0DataArrayWrite,   desc="Write the data array";
+    L1D0TagArrayRead,     desc="Read the data array";
+    L1D0TagArrayWrite,    desc="Write the data array";
+    L1D1DataArrayRead,    desc="Read the data array";
+    L1D1DataArrayWrite,   desc="Write the data array";
+    L1D1TagArrayRead,     desc="Read the data array";
+    L1D1TagArrayWrite,    desc="Write the data array";
+    L1IDataArrayRead,     desc="Read the data array";
+    L1IDataArrayWrite,    desc="Write the data array";
+    L1ITagArrayRead,      desc="Read the data array";
+    L1ITagArrayWrite,     desc="Write the data array";
+    L2DataArrayRead,      desc="Read the data array";
+    L2DataArrayWrite,     desc="Write the data array";
+    L2TagArrayRead,       desc="Read the data array";
+    L2TagArrayWrite,      desc="Write the data array";
+  }
+
+
+  // BEGIN STRUCTURE DEFINITIONS
+
+
+  // Cache Entry
+  structure(Entry, desc="...", interface="AbstractCacheEntry") {
+    State CacheState,           desc="cache state";
+    bool Dirty,                 desc="Is the data dirty (diff than memory)?";
+    DataBlock DataBlk,          desc="data for the block";
+    bool FromL2, default="false", desc="block just moved from L2";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,             desc="Transient state";
+    DataBlock DataBlk,       desc="data for the block, required for concurrent writebacks";
+    bool Dirty,              desc="Is the data dirty (different than memory)?";
+    int NumPendingMsgs,      desc="Number of acks/data messages that this processor is waiting for";
+    bool Shared,             desc="Victim hit by shared probe";
+    bool AckNeeded,          desc="True if need to ack r-dir";
+   }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<CorePair_TBE>", constructor="m_number_of_TBEs";
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  // END STRUCTURE DEFINITIONS
+
+  // BEGIN INTERNAL FUNCTIONS
+
+  MachineID getPeer(MachineID mach) {
+    return createMachineID(MachineType:RegionBuffer, intToID(regionBufferNum));
+  }
+
+  bool addressInCore(Addr addr) {
+    return (L2cache.isTagPresent(addr) || L1Icache.isTagPresent(addr) || L1D0cache.isTagPresent(addr) || L1D1cache.isTagPresent(addr));
+  }
+
+  Entry getCacheEntry(Addr address), return_by_pointer="yes" {
+    Entry L2cache_entry := static_cast(Entry, "pointer", L2cache.lookup(address));
+    return L2cache_entry;
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return tbe.DataBlk;
+    } else {
+      return getCacheEntry(addr).DataBlk;
+    }
+  }
+
+  Entry getL1CacheEntry(Addr addr, int cluster), return_by_pointer="yes" {
+    if (cluster == 0) {
+      Entry L1D0_entry := static_cast(Entry, "pointer", L1D0cache.lookup(addr));
+      return L1D0_entry;
+    } else {
+      Entry L1D1_entry := static_cast(Entry, "pointer", L1D1cache.lookup(addr));
+      return L1D1_entry;
+    }
+  }
+
+  Entry getICacheEntry(Addr addr), return_by_pointer="yes" {
+    Entry c_entry := static_cast(Entry, "pointer", L1Icache.lookup(addr));
+    return c_entry;
+  }
+
+  bool presentOrAvail2(Addr addr) {
+    return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
+  }
+
+  bool presentOrAvailI(Addr addr) {
+    return L1Icache.isTagPresent(addr) || L1Icache.cacheAvail(addr);
+  }
+
+  bool presentOrAvailD0(Addr addr) {
+    return L1D0cache.isTagPresent(addr) || L1D0cache.cacheAvail(addr);
+  }
+
+  bool presentOrAvailD1(Addr addr) {
+    return L1D1cache.isTagPresent(addr) || L1D1cache.cacheAvail(addr);
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if(is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      return CorePair_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return CorePair_State_to_permission(cache_entry.CacheState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  bool isValid(Addr addr) {
+      AccessPermission perm := getAccessPermission(addr);
+      if (perm == AccessPermission:NotPresent ||
+          perm == AccessPermission:Invalid ||
+          perm == AccessPermission:Busy) {
+          return false;
+      } else {
+          return true;
+      }
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(CorePair_State_to_permission(state));
+    }
+  }
+
+  MachineType testAndClearLocalHit(Entry cache_entry) {
+    assert(is_valid(cache_entry));
+    if (cache_entry.FromL2) {
+      cache_entry.FromL2 := false;
+      return MachineType:L2Cache;
+    } else {
+      return MachineType:L1Cache;
+    }
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L1D0DataArrayRead) {
+      L1D0cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L1D0DataArrayWrite) {
+      L1D0cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L1D0TagArrayRead) {
+      L1D0cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L1D0TagArrayWrite) {
+      L1D0cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    } else if (request_type == RequestType:L1D1DataArrayRead) {
+      L1D1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L1D1DataArrayWrite) {
+      L1D1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L1D1TagArrayRead) {
+      L1D1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L1D1TagArrayWrite) {
+      L1D1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    } else if (request_type == RequestType:L1IDataArrayRead) {
+      L1Icache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L1IDataArrayWrite) {
+      L1Icache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L1ITagArrayRead) {
+      L1Icache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L1ITagArrayWrite) {
+      L1Icache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    } else if (request_type == RequestType:L2DataArrayRead) {
+      L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L2DataArrayWrite) {
+      L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L2TagArrayRead) {
+      L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L2TagArrayWrite) {
+      L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L2DataArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L2DataArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L2TagArrayRead) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L2TagArrayWrite) {
+      return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if  (request_type == RequestType:L1D0DataArrayRead) {
+      return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if  (request_type == RequestType:L1D0DataArrayWrite) {
+      return L1D0cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1D0TagArrayRead) {
+      return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1D0TagArrayWrite) {
+      return L1D0cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1D1DataArrayRead) {
+      return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1D1DataArrayWrite) {
+      return L1D1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1D1TagArrayRead) {
+      return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1D1TagArrayWrite) {
+      return L1D1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1IDataArrayRead) {
+      return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1IDataArrayWrite) {
+      return L1Icache.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L1ITagArrayRead) {
+      return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L1ITagArrayWrite) {
+      return L1Icache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      return true;
+    }
+  }
+
+  // END INTERNAL FUNCTIONS
+
+  // ** OUT_PORTS **
+
+  out_port(requestNetwork_out, CPURequestMsg, requestFromCore);
+  out_port(responseNetwork_out, ResponseMsg, responseFromCore);
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+  out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
+
+  // ** IN_PORTS **
+
+  in_port(triggerQueue_in, TriggerMsg, triggerQueue, block_on="addr") {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == TriggerType:L2_to_L1) {
+          if (in_msg.Dest == CacheId:L1I) {
+            trigger(Event:L2_to_L1I, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Dest == CacheId:L1D0) {
+            trigger(Event:L2_to_L1D0, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Dest == CacheId:L1D1) {
+            trigger(Event:L2_to_L1D1, in_msg.addr, cache_entry, tbe);
+          } else {
+            error("unexpected trigger dest");
+          }
+        }
+      }
+    }
+  }
+
+
+  in_port(probeNetwork_in, NBProbeRequestMsg, probeToCore) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+      peek(probeNetwork_in, NBProbeRequestMsg, block_on="addr") {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == ProbeRequestType:PrbInv) {
+          if (in_msg.DemandRequest) {
+            trigger(Event:PrbInvDataDemand, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.ReturnData) {
+            trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+          if (in_msg.DemandRequest) {
+               trigger(Event:PrbShrDataDemand, in_msg.addr, cache_entry, tbe);
+          } else {
+               assert(in_msg.ReturnData);
+               trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == ProbeRequestType:PrbRepl) {
+          trigger(Event:ForceRepl, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == ProbeRequestType:PrbRegDowngrade) {
+          trigger(Event:ForceDowngrade, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unknown probe request");
+        }
+      }
+    }
+  }
+
+
+  // ResponseNetwork
+  in_port(responseToCore_in, ResponseMsg, responseToCore) {
+    if (responseToCore_in.isReady(clockEdge())) {
+      peek(responseToCore_in, ResponseMsg, block_on="addr") {
+
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+
+        if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+          if (in_msg.State == CoherenceState:Modified) {
+              trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.State == CoherenceState:Shared) {
+            trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.State == CoherenceState:Exclusive) {
+            trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+          trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unexpected Response Message to Core");
+        }
+      }
+    }
+  }
+
+  // Nothing from the Unblock Network
+
+  // Mandatory Queue
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+    if (mandatoryQueue_in.isReady(clockEdge())) {
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+
+        Entry cache_entry := getCacheEntry(in_msg.LineAddress);
+        TBE tbe := TBEs.lookup(in_msg.LineAddress);
+
+        if (in_msg.Type == RubyRequestType:IFETCH) {
+          // FETCH ACCESS
+
+          if (L1Icache.isTagPresent(in_msg.LineAddress)) {
+            if (mod(in_msg.contextId, 2) == 0) {
+              trigger(Event:Ifetch0_L1hit, in_msg.LineAddress, cache_entry, tbe);
+            } else {
+              trigger(Event:Ifetch1_L1hit, in_msg.LineAddress, cache_entry, tbe);
+            }
+          } else {
+            if (presentOrAvail2(in_msg.LineAddress)) {
+              if (presentOrAvailI(in_msg.LineAddress)) {
+                if (mod(in_msg.contextId, 2) == 0) {
+                  trigger(Event:Ifetch0_L1miss, in_msg.LineAddress, cache_entry,
+                          tbe);
+                } else {
+                  trigger(Event:Ifetch1_L1miss, in_msg.LineAddress, cache_entry,
+                          tbe);
+                }
+              } else {
+                Addr victim := L1Icache.cacheProbe(in_msg.LineAddress);
+                trigger(Event:L1I_Repl, victim,
+                        getCacheEntry(victim), TBEs.lookup(victim));
+              }
+            } else { // Not present or avail in L2
+              Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+              DPRINTF(RubySlicc, "Victim for %s L2_Repl(0) is %s\n", in_msg.LineAddress, victim);
+              trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+                      TBEs.lookup(victim));
+            }
+          }
+        } else {
+          // DATA ACCESS
+          if (mod(in_msg.contextId, 2) == 1) {
+            if (L1D1cache.isTagPresent(in_msg.LineAddress)) {
+              if (in_msg.Type == RubyRequestType:LD) {
+                trigger(Event:C1_Load_L1hit, in_msg.LineAddress, cache_entry,
+                        tbe);
+              } else {
+                // Stores must write through, make sure L2 avail.
+                if (presentOrAvail2(in_msg.LineAddress)) {
+                  trigger(Event:C1_Store_L1hit, in_msg.LineAddress, cache_entry,
+                          tbe);
+                } else {
+                  Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+              DPRINTF(RubySlicc, "Victim for %s L2_Repl(1) is %s\n", in_msg.LineAddress, victim);
+                  trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+                          TBEs.lookup(victim));
+                }
+              }
+            } else {
+              if (presentOrAvail2(in_msg.LineAddress)) {
+                if (presentOrAvailD1(in_msg.LineAddress)) {
+                  if (in_msg.Type == RubyRequestType:LD) {
+                    trigger(Event:C1_Load_L1miss, in_msg.LineAddress,
+                            cache_entry, tbe);
+                  } else {
+                    trigger(Event:C1_Store_L1miss, in_msg.LineAddress,
+                            cache_entry, tbe);
+                  }
+                } else {
+                  Addr victim := L1D1cache.cacheProbe(in_msg.LineAddress);
+              DPRINTF(RubySlicc, "Victim for %s L1D1_Repl is %s\n", in_msg.LineAddress, victim);
+                  trigger(Event:L1D1_Repl, victim,
+                          getCacheEntry(victim), TBEs.lookup(victim));
+                }
+              } else { // not present or avail in L2
+                Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+              DPRINTF(RubySlicc, "Victim for %s L2_Repl(2) is %s\n", in_msg.LineAddress, victim);
+                trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+              }
+            }
+          } else {
+            Entry L1D0cache_entry := getL1CacheEntry(in_msg.LineAddress, 0);
+            if (is_valid(L1D0cache_entry)) {
+              if (in_msg.Type == RubyRequestType:LD) {
+                trigger(Event:C0_Load_L1hit, in_msg.LineAddress, cache_entry,
+                    tbe);
+              } else {
+                if (presentOrAvail2(in_msg.LineAddress)) {
+                  trigger(Event:C0_Store_L1hit, in_msg.LineAddress, cache_entry,
+                      tbe);
+                } else {
+                  Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+              DPRINTF(RubySlicc, "Victim for %s L2_Repl(3) is %s\n", in_msg.LineAddress, victim);
+                  trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+                      TBEs.lookup(victim));
+                }
+              }
+            } else {
+              if (presentOrAvail2(in_msg.LineAddress)) {
+                if (presentOrAvailD0(in_msg.LineAddress)) {
+                  if (in_msg.Type == RubyRequestType:LD) {
+                    trigger(Event:C0_Load_L1miss, in_msg.LineAddress,
+                        cache_entry, tbe);
+                  } else {
+                    trigger(Event:C0_Store_L1miss, in_msg.LineAddress,
+                            cache_entry, tbe);
+                  }
+                } else {
+                  Addr victim := L1D0cache.cacheProbe(in_msg.LineAddress);
+              DPRINTF(RubySlicc, "Victim for %s L1D0_Repl is %s\n", in_msg.LineAddress, victim);
+                  trigger(Event:L1D0_Repl, victim, getCacheEntry(victim),
+                          TBEs.lookup(victim));
+                }
+              } else {
+                Addr victim := L2cache.cacheProbe(in_msg.LineAddress);
+              DPRINTF(RubySlicc, "Victim for %s L2_Repl(4) is %s\n", in_msg.LineAddress, victim);
+                trigger(Event:L2_Repl, victim, getCacheEntry(victim),
+                        TBEs.lookup(victim));
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+
+  // ACTIONS
+  action(ii_invIcache, "ii", desc="invalidate iCache") {
+    if (L1Icache.isTagPresent(address)) {
+      L1Icache.deallocate(address);
+    }
+  }
+
+  action(i0_invCluster, "i0", desc="invalidate cluster 0") {
+    if (L1D0cache.isTagPresent(address)) {
+      L1D0cache.deallocate(address);
+    }
+  }
+
+  action(i1_invCluster, "i1", desc="invalidate cluster 1") {
+    if (L1D1cache.isTagPresent(address)) {
+      L1D1cache.deallocate(address);
+    }
+  }
+
+  action(ib_invBothClusters, "ib", desc="invalidate both clusters") {
+    if (L1D0cache.isTagPresent(address)) {
+      L1D0cache.deallocate(address);
+    }
+    if (L1D1cache.isTagPresent(address)) {
+      L1D1cache.deallocate(address);
+    }
+  }
+
+  action(i2_invL2, "i2", desc="invalidate L2") {
+    if(is_valid(cache_entry)) {
+        L2cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(n_issueRdBlk, "n", desc="Issue RdBlk") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlk;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkM;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(nMs_issueRdBlkMSinked, "nMs", desc="Issue RdBlkM with CtoDSinked") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkM;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.CtoDSinked := true;
+    }
+  }
+
+  action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkS;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.InitialRequestTime := curCycle();
+    }
+  }
+
+  action(nSs_issueRdBlkSSinked, "nSs", desc="Issue RdBlkS with CtoDSinked") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceRequestType:RdBlkS;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.CtoDSinked := true;
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+    }
+  }
+
+  action(vd_victim, "vd", desc="Victimize M/O L2 Data") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      assert(is_valid(cache_entry));
+      out_msg.DataBlk := cache_entry.DataBlk;
+      assert(cache_entry.Dirty);
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicDirty;
+      out_msg.InitialRequestTime := curCycle();
+      if (cache_entry.CacheState == State:O) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+    }
+  }
+
+  action(vc_victim, "vc", desc="Victimize E/S L2 Data") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicClean;
+      out_msg.InitialRequestTime := curCycle();
+      if (cache_entry.CacheState == State:S) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+    }
+  }
+
+  // Could send these two directly to dir if we made a new out network on channel 0
+  action(vdf_victimForce, "vdf", desc="Victimize M/O L2 Data") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      assert(is_valid(cache_entry));
+      out_msg.DataBlk := cache_entry.DataBlk;
+      assert(cache_entry.Dirty);
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicDirty;
+      out_msg.InitialRequestTime := curCycle();
+      if (cache_entry.CacheState == State:O) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+      out_msg.Private := true;
+    }
+  }
+
+  action(vcf_victimForce, "vcf", desc="Victimize E/S L2 Data") {
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.Type := CoherenceRequestType:VicClean;
+      out_msg.InitialRequestTime := curCycle();
+      if (cache_entry.CacheState == State:S) {
+        out_msg.Shared := true;
+      } else {
+        out_msg.Shared := false;
+      }
+      out_msg.Private := true;
+    }
+  }
+
+  action(a0_allocateL1D, "a0", desc="Allocate L1D0 Block") {
+    if (L1D0cache.isTagPresent(address) == false) {
+      L1D0cache.allocateVoid(address, new Entry);
+    }
+  }
+
+  action(a1_allocateL1D, "a1", desc="Allocate L1D1 Block") {
+    if (L1D1cache.isTagPresent(address) == false) {
+      L1D1cache.allocateVoid(address, new Entry);
+    }
+  }
+
+  action(ai_allocateL1I, "ai", desc="Allocate L1I Block") {
+    if (L1Icache.isTagPresent(address) == false) {
+      L1Icache.allocateVoid(address, new Entry);
+    }
+  }
+
+  action(a2_allocateL2, "a2", desc="Allocate L2 Block") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(L2cache.allocate(address, new Entry));
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    assert(is_valid(cache_entry));
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+    tbe.DataBlk := cache_entry.DataBlk;  // Data only used for WBs
+    tbe.Dirty := cache_entry.Dirty;
+    tbe.Shared := false;
+  }
+
+  action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
+    mandatoryQueue_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
+    responseToCore_in.dequeue(clockEdge());
+  }
+
+  action(pt_popTriggerQueue, "pt", desc="Pop Trigger Queue") {
+    triggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="pop probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+
+  action(il0_loadDone, "il0", desc="Cluster 0 i load done") {
+    Entry entry := getICacheEntry(address);
+    Entry l2entry := getCacheEntry(address); // Used for functional accesses
+    assert(is_valid(entry));
+    // L2 supplies data (functional accesses only look in L2, ok because L1
+    //                   writes through to L2)
+    sequencer.readCallback(address,
+                           l2entry.DataBlk,
+                           true,
+                           testAndClearLocalHit(entry));
+  }
+
+  action(il1_loadDone, "il1", desc="Cluster 1 i load done") {
+    Entry entry := getICacheEntry(address);
+    Entry l2entry := getCacheEntry(address); // Used for functional accesses
+    assert(is_valid(entry));
+    // L2 supplies data (functional accesses only look in L2, ok because L1
+    //                   writes through to L2)
+    sequencer1.readCallback(address,
+                            l2entry.DataBlk,
+                            true,
+                            testAndClearLocalHit(entry));
+  }
+
+  action(l0_loadDone, "l0", desc="Cluster 0 load done") {
+    Entry entry := getL1CacheEntry(address, 0);
+    Entry l2entry := getCacheEntry(address); // Used for functional accesses
+    assert(is_valid(entry));
+    // L2 supplies data (functional accesses only look in L2, ok because L1
+    //                   writes through to L2)
+    sequencer.readCallback(address,
+                           l2entry.DataBlk,
+                           true,
+                           testAndClearLocalHit(entry));
+  }
+
+  action(l1_loadDone, "l1", desc="Cluster 1 load done") {
+    Entry entry := getL1CacheEntry(address, 1);
+    Entry l2entry := getCacheEntry(address); // Used for functional accesses
+    assert(is_valid(entry));
+    // L2 supplies data (functional accesses only look in L2, ok because L1
+    //                   writes through to L2)
+    sequencer1.readCallback(address,
+                            l2entry.DataBlk,
+                            true,
+                            testAndClearLocalHit(entry));
+  }
+
+  action(xl0_loadDone, "xl0", desc="Cluster 0 load done") {
+    peek(responseToCore_in, ResponseMsg) {
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+              (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      Entry l2entry := getCacheEntry(address); // Used for functional accesses
+      DPRINTF(ProtocolTrace, "CP Load Done 0 -- address %s, data: %s\n",
+              address, l2entry.DataBlk);
+      // L2 supplies data (functional accesses only look in L2, ok because L1
+      //                   writes through to L2)
+      assert(is_valid(l2entry));
+      sequencer.readCallback(address,
+                             l2entry.DataBlk,
+                             false,
+                             machineIDToMachineType(in_msg.Sender),
+                             in_msg.InitialRequestTime,
+                             in_msg.ForwardRequestTime,
+                             in_msg.ProbeRequestStartTime);
+    }
+  }
+
+  action(xl1_loadDone, "xl1", desc="Cluster 1 load done") {
+   peek(responseToCore_in, ResponseMsg) {
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+              (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      Entry l2entry := getCacheEntry(address); // Used for functional accesses
+      // L2 supplies data (functional accesses only look in L2, ok because L1
+      //                   writes through to L2)
+      assert(is_valid(l2entry));
+      sequencer1.readCallback(address,
+                              l2entry.DataBlk,
+                              false,
+                              machineIDToMachineType(in_msg.Sender),
+                              in_msg.InitialRequestTime,
+                              in_msg.ForwardRequestTime,
+                              in_msg.ProbeRequestStartTime);
+   }
+  }
+
+  action(xi0_loadDone, "xi0", desc="Cluster 0 i-load done") {
+    peek(responseToCore_in, ResponseMsg) {
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+              (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      Entry l2entry := getCacheEntry(address); // Used for functional accesses
+      // L2 supplies data (functional accesses only look in L2, ok because L1
+      //                   writes through to L2)
+      assert(is_valid(l2entry));
+      sequencer.readCallback(address,
+                             l2entry.DataBlk,
+                             false,
+                             machineIDToMachineType(in_msg.Sender),
+                             in_msg.InitialRequestTime,
+                             in_msg.ForwardRequestTime,
+                             in_msg.ProbeRequestStartTime);
+    }
+  }
+
+  action(xi1_loadDone, "xi1", desc="Cluster 1 i-load done") {
+    peek(responseToCore_in, ResponseMsg) {
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+              (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      Entry l2entry := getCacheEntry(address); // Used for functional accesses
+      // L2 supplies data (functional accesses only look in L2, ok because L1
+      //                   writes through to L2)
+      assert(is_valid(l2entry));
+      sequencer1.readCallback(address,
+                              l2entry.DataBlk,
+                              false,
+                              machineIDToMachineType(in_msg.Sender),
+                              in_msg.InitialRequestTime,
+                              in_msg.ForwardRequestTime,
+                              in_msg.ProbeRequestStartTime);
+    }
+  }
+
+  action(s0_storeDone, "s0", desc="Cluster 0 store done") {
+    Entry entry := getL1CacheEntry(address, 0);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    sequencer.writeCallback(address,
+                            cache_entry.DataBlk,
+                            true,
+                            testAndClearLocalHit(entry));
+    cache_entry.Dirty := true;
+    entry.DataBlk := cache_entry.DataBlk;
+    entry.Dirty := true;
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+  }
+
+  action(s1_storeDone, "s1", desc="Cluster 1 store done") {
+    Entry entry := getL1CacheEntry(address, 1);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    sequencer1.writeCallback(address,
+                             cache_entry.DataBlk,
+                             true,
+                             testAndClearLocalHit(entry));
+    cache_entry.Dirty := true;
+    entry.Dirty := true;
+    entry.DataBlk := cache_entry.DataBlk;
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+  }
+
+  action(xs0_storeDone, "xs0", desc="Cluster 0 store done") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getL1CacheEntry(address, 0);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+             (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      sequencer.writeCallback(address,
+                              cache_entry.DataBlk,
+                              false,
+                              machineIDToMachineType(in_msg.Sender),
+                              in_msg.InitialRequestTime,
+                              in_msg.ForwardRequestTime,
+                              in_msg.ProbeRequestStartTime);
+      cache_entry.Dirty := true;
+      entry.Dirty := true;
+      entry.DataBlk := cache_entry.DataBlk;
+      DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    }
+  }
+
+  action(xs1_storeDone, "xs1", desc="Cluster 1 store done") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getL1CacheEntry(address, 1);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      assert((machineIDToMachineType(in_msg.Sender) == MachineType:Directory) ||
+             (machineIDToMachineType(in_msg.Sender) == MachineType:L3Cache));
+      sequencer1.writeCallback(address,
+                               cache_entry.DataBlk,
+                               false,
+                               machineIDToMachineType(in_msg.Sender),
+                               in_msg.InitialRequestTime,
+                               in_msg.ForwardRequestTime,
+                               in_msg.ProbeRequestStartTime);
+      cache_entry.Dirty := true;
+      entry.Dirty := true;
+      entry.DataBlk := cache_entry.DataBlk;
+      DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    }
+  }
+
+  action(forward_eviction_to_cpu0, "fec0", desc="sends eviction information to processor0") {
+    if (send_evictions) {
+      DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+      sequencer.evictionCallback(address);
+    }
+  }
+
+  action(forward_eviction_to_cpu1, "fec1", desc="sends eviction information to processor1") {
+    if (send_evictions) {
+      DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+      sequencer1.evictionCallback(address);
+    }
+  }
+
+  action(ci_copyL2ToL1, "ci", desc="copy L2 data to L1") {
+    Entry entry := getICacheEntry(address);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    entry.Dirty := cache_entry.Dirty;
+    entry.DataBlk := cache_entry.DataBlk;
+    entry.FromL2 := true;
+  }
+
+  action(c0_copyL2ToL1, "c0", desc="copy L2 data to L1") {
+    Entry entry := getL1CacheEntry(address, 0);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    entry.Dirty := cache_entry.Dirty;
+    entry.DataBlk := cache_entry.DataBlk;
+    entry.FromL2 := true;
+  }
+
+  action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
+    peek(responseToCore_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:StaleNotif;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(map_Address_to_Directory(address));
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(c1_copyL2ToL1, "c1", desc="copy L2 data to L1") {
+    Entry entry := getL1CacheEntry(address, 1);
+    assert(is_valid(entry));
+    assert(is_valid(cache_entry));
+    entry.Dirty := cache_entry.Dirty;
+    entry.DataBlk := cache_entry.DataBlk;
+    entry.FromL2 := true;
+  }
+
+  action(fi_L2ToL1, "fi", desc="L2 to L1 inst fill") {
+    enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+      out_msg.addr := address;
+      out_msg.Type := TriggerType:L2_to_L1;
+      out_msg.Dest := CacheId:L1I;
+    }
+  }
+
+  action(f0_L2ToL1, "f0", desc="L2 to L1 data fill") {
+    enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+      out_msg.addr := address;
+      out_msg.Type := TriggerType:L2_to_L1;
+      out_msg.Dest := CacheId:L1D0;
+    }
+  }
+
+  action(f1_L2ToL1, "f1", desc="L2 to L1 data fill") {
+    enqueue(triggerQueue_out, TriggerMsg, l2_hit_latency) {
+      out_msg.addr := address;
+      out_msg.Type := TriggerType:L2_to_L1;
+      out_msg.Dest := CacheId:L1D1;
+    }
+  }
+
+  action(wi_writeIcache, "wi", desc="write data to icache (and l2)") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getICacheEntry(address);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      entry.DataBlk := in_msg.DataBlk;
+      entry.Dirty := in_msg.Dirty;
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(w0_writeDcache, "w0", desc="write data to dcache 0 (and l2)") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getL1CacheEntry(address, 0);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      entry.DataBlk := in_msg.DataBlk;
+      entry.Dirty := in_msg.Dirty;
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(w1_writeDcache, "w1", desc="write data to dcache 1 (and l2)") {
+    peek(responseToCore_in, ResponseMsg) {
+      Entry entry := getL1CacheEntry(address, 1);
+      assert(is_valid(entry));
+      assert(is_valid(cache_entry));
+      entry.DataBlk := in_msg.DataBlk;
+      entry.Dirty := in_msg.Dirty;
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(wb_data, "wb", desc="write back data") {
+    peek(responseToCore_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:CPUData;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(map_Address_to_Directory(address));
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Dirty := tbe.Dirty;
+        if (tbe.Shared) {
+          out_msg.NbReqShared := true;
+        } else {
+          out_msg.NbReqShared := false;
+        }
+        out_msg.State := CoherenceState:Shared; // faux info
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.Dirty := false;
+      out_msg.Hit := false;
+      out_msg.Ntsl := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.Dirty := false;
+      out_msg.Ntsl := true;
+      out_msg.Hit := false;
+      APPEND_TRANSITION_COMMENT("Setting Ms");
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(ph_sendProbeResponseHit, "ph", desc="send probe ack PrbShrData, no data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      assert(addressInCore(address) || is_valid(tbe));
+      out_msg.Dirty := false;  // only true if sending back data i think
+      out_msg.Hit := true;
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(pb_sendProbeResponseBackprobe, "pb", desc="send probe ack PrbShrData, no data, check for L1 residence") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      if (addressInCore(address)) {
+        out_msg.Hit := true;
+      } else {
+        out_msg.Hit := false;
+      }
+      out_msg.Dirty := false;  // not sending back data, so def. not dirty
+      out_msg.Ntsl := false;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.DataBlk := cache_entry.DataBlk;
+      assert(cache_entry.Dirty);
+      out_msg.Dirty := true;
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(cache_entry));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+      out_msg.DataBlk := cache_entry.DataBlk;
+      assert(cache_entry.Dirty);
+      out_msg.Dirty := true;
+      out_msg.Hit := true;
+      APPEND_TRANSITION_COMMENT("Setting Ms");
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      assert(is_valid(tbe));
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.DataBlk := tbe.DataBlk;
+      assert(tbe.Dirty);
+      out_msg.Dirty := true;
+      out_msg.Hit := true;
+      out_msg.State := CoherenceState:NA;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.isValid := isValid(address);
+    }
+  }
+
+  action(ra_sendReplAck, "ra", desc="Send ack to r-buf that line is replaced if needed") {
+    if (is_invalid(tbe) || tbe.AckNeeded) {
+      enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceRequestType:InvAck;
+        out_msg.Requestor := machineID;
+        out_msg.Destination.add(getPeer(machineID));
+        out_msg.MessageSize := MessageSizeType:Request_Control;
+      }
+      APPEND_TRANSITION_COMMENT(" Sending ack to r-buf ");
+    } else {
+      APPEND_TRANSITION_COMMENT(" NOT Sending ack to r-buf ");
+    }
+  }
+
+  action(m_markAckNeeded, "m", desc="Mark TBE to send ack when deallocated") {
+    assert(is_valid(tbe));
+    tbe.AckNeeded := true;
+  }
+
+  action(mc_cancelWB, "mc", desc="send writeback cancel to L3") {
+    enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:CPUCancelWB;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Sender := machineID;
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(s_setSharedFlip, "s", desc="hit by shared probe, status may be different") {
+    assert(is_valid(tbe));
+    tbe.Shared := true;
+  }
+
+  action(uu_sendUnblock, "uu", desc="state changed, unblock") {
+    enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      out_msg.wasValid := isValid(address);
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(sdv_sendDoneValid, "sdv", desc="Request finished, send done ack") {
+    enqueue(unblockNetwork_out, UnblockMsg, 1) {
+      out_msg.addr := address;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.DoneAck := true;
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      if (is_valid(tbe)) {
+          out_msg.Dirty := tbe.Dirty;
+      } else if (is_valid(cache_entry)) {
+          out_msg.Dirty := cache_entry.Dirty;
+      } else {
+          out_msg.Dirty := false;
+      }
+      out_msg.validToInvalid := false;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(sdi_sendDoneInvalid, "sdi", desc="Request finished, send done ack") {
+    enqueue(unblockNetwork_out, UnblockMsg, 1) {
+      out_msg.addr := address;
+      out_msg.Destination.add(getPeer(machineID));
+      out_msg.DoneAck := true;
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      if (is_valid(tbe)) {
+          out_msg.Dirty := tbe.Dirty;
+      } else if (is_valid(cache_entry)) {
+          out_msg.Dirty := cache_entry.Dirty;
+      } else {
+          out_msg.Dirty := false;
+      }
+      out_msg.validToInvalid := true;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(l10m_profileMiss, "l10m", desc="l10m miss profile") {
+    ++L1D0cache.demand_misses;
+  }
+
+  action(l11m_profileMiss, "l11m", desc="l11m miss profile") {
+    ++L1D1cache.demand_misses;
+  }
+
+  action(l1im_profileMiss, "l1lm", desc="l1im miss profile") {
+    ++L1Icache.demand_misses;
+  }
+
+  action(l2m_profileMiss, "l2m", desc="l2m miss profile") {
+    ++L2cache.demand_misses;
+  }
+
+  action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
+    probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
+    mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+  // END ACTIONS
+
+  // BEGIN TRANSITIONS
+
+  // transitions from base
+  transition(I, C0_Load_L1miss, I_E0S) {L1D0TagArrayRead, L2TagArrayRead} {
+    // track misses, if implemented
+    // since in I state, L2 miss as well
+    l2m_profileMiss;
+    l10m_profileMiss;
+    a0_allocateL1D;
+    l1im_profileMiss;
+    a2_allocateL2;
+    i1_invCluster;
+    ii_invIcache;
+    n_issueRdBlk;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, C1_Load_L1miss, I_E1S) {L1D1TagArrayRead, L2TagArrayRead} {
+    // track misses, if implemented
+    // since in I state, L2 miss as well
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    a2_allocateL2;
+    i0_invCluster;
+    ii_invIcache;
+    n_issueRdBlk;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, Ifetch0_L1miss, S0) {L1ITagArrayRead, L2TagArrayRead} {
+    // track misses, if implemented
+    // L2 miss as well
+    l10m_profileMiss;
+    l2m_profileMiss;
+    l1im_profileMiss;
+    ai_allocateL1I;
+    a2_allocateL2;
+    ib_invBothClusters;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} {
+     l11m_profileMiss;
+    // track misses, if implemented
+    // L2 miss as well
+    l2m_profileMiss;
+    l1im_profileMiss;
+    ai_allocateL1I;
+    a2_allocateL2;
+    ib_invBothClusters;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, C0_Store_L1miss, I_M0) {L1D0TagArrayRead,L2TagArrayRead} {
+    l2m_profileMiss;
+    l10m_profileMiss;
+    a0_allocateL1D;
+    a2_allocateL2;
+    i1_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(I, C1_Store_L1miss, I_M1) {L1D0TagArrayRead, L2TagArrayRead} {
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    a2_allocateL2;
+    i0_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(S, C0_Load_L1miss, S_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(S, C1_Load_L1miss, S_F1) {L1D1TagArrayRead,  L2TagArrayRead, L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(S, Ifetch0_L1miss, Si_F0) {L1ITagArrayRead,L2TagArrayRead, L2DataArrayRead} {
+    l1im_profileMiss;
+    ai_allocateL1I;
+    fi_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(S, Ifetch1_L1miss, Si_F1) {L1ITagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+    l1im_profileMiss;
+    ai_allocateL1I;
+    fi_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition({S}, {C0_Store_L1hit, C0_Store_L1miss}, S_M0) {L1D0TagArrayRead, L2TagArrayRead}{
+    l2m_profileMiss;
+    l10m_profileMiss;
+    a0_allocateL1D;
+    i1_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition({S}, {C1_Store_L1hit, C1_Store_L1miss}, S_M1) {L1D1TagArrayRead,L2TagArrayRead} {
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    i0_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+  transition(Es, C0_Load_L1miss, Es_F0) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {  // can this be folded with S_F?
+     l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(Es, C1_Load_L1miss, Es_F1) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} {  // can this be folded with S_F?
+     l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(Es, Ifetch0_L1miss, S0) {L1ITagArrayRead, L2TagArrayRead} {
+      l1im_profileMiss;
+     i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    ib_invBothClusters;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(Es, Ifetch1_L1miss, S1) {L1ITagArrayRead, L2TagArrayRead} {
+     l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    ib_invBothClusters;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  // THES SHOULD NOT BE INSTANTANEOUS BUT OH WELL FOR NOW
+  transition(Es, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayWrite,L1D0TagArrayRead, L2TagArrayRead, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+    a0_allocateL1D;
+    i1_invCluster;
+    s0_storeDone;   // instantaneous L1/L2 dirty - no writethrough delay
+    p_popMandatoryQueue;
+  }
+
+  transition(Es, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+    a1_allocateL1D;
+    i0_invCluster;
+    s1_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, C0_Load_L1miss, E0_F) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+     l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, C1_Load_L1miss, E0_Es) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+     l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, Ifetch0_L1miss, S0) {L2TagArrayRead, L1ITagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    i0_invCluster;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead } {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    i0_invCluster;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+    a0_allocateL1D;
+    s0_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(E0, C1_Store_L1miss, M1) {L1D0TagArrayRead, L1D0TagArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+    a1_allocateL1D;
+    l11m_profileMiss;
+    i0_invCluster;
+    s1_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, C1_Load_L1miss, E1_F) {L1D1TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+    a1_allocateL1D;
+    l11m_profileMiss;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, C0_Load_L1miss, E1_Es) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+    a0_allocateL1D;
+    l10m_profileMiss;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, Ifetch1_L1miss, S1) {L2TagArrayRead, L1ITagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    i1_invCluster;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, Ifetch0_L1miss, S0) {L2TagArrayRead,L1ITagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+    l1im_profileMiss;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    i1_invCluster;
+    nS_issueRdBlkS;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+    a1_allocateL1D;
+    s1_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(E1, C0_Store_L1miss, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L2TagArrayRead, L2TagArrayWrite, L2DataArrayWrite} {
+     l10m_profileMiss;
+    a0_allocateL1D;
+    i1_invCluster;
+    s0_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition({O}, {C0_Store_L1hit, C0_Store_L1miss}, O_M0) {L1D0TagArrayRead, L2TagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue CtoD
+     l10m_profileMiss;
+    a0_allocateL1D;
+    i1_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition({O}, {C1_Store_L1hit, C1_Store_L1miss}, O_M1) {L1D1TagArrayRead, L2TagArrayRead} {
+    l2m_profileMiss; // permissions miss, still issue RdBlkS
+    l11m_profileMiss;
+    a1_allocateL1D;
+    i0_invCluster;
+    ii_invIcache;
+    nM_issueRdBlkM;
+    p_popMandatoryQueue;
+  }
+
+  transition(O, C0_Load_L1miss, O_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(O, C1_Load_L1miss, O_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(Ms, C0_Load_L1miss, Ms_F0) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(Ms, C1_Load_L1miss, Ms_F1) {L2TagArrayRead, L2DataArrayRead, L1D1TagArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition({Ms, M0, M1, O}, Ifetch0_L1miss, MO_S0) {L1ITagArrayRead,  L2TagArrayRead} {
+    l2m_profileMiss;  // permissions miss
+    l1im_profileMiss;
+    ai_allocateL1I;
+    t_allocateTBE;
+    ib_invBothClusters;
+    vd_victim;
+//    i2_invL2;
+    p_popMandatoryQueue;
+  }
+
+  transition({Ms, M0, M1, O}, Ifetch1_L1miss, MO_S1) {L1ITagArrayRead L2TagArrayRead } {
+    l2m_profileMiss;  // permissions miss
+    l10m_profileMiss;
+    ai_allocateL1I;
+    t_allocateTBE;
+    ib_invBothClusters;
+    vd_victim;
+//    i2_invL2;
+    p_popMandatoryQueue;
+  }
+
+  transition(Ms, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+    a0_allocateL1D;
+    i1_invCluster;
+    s0_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(Ms, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D1TagArrayRead, L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+    a1_allocateL1D;
+    i0_invCluster;
+    s1_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(M0, C0_Load_L1miss, M0_F) {L1D0TagArrayRead, L2TagArrayRead, L2DataArrayRead} {
+   l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(M0, C1_Load_L1miss, M0_Ms) {L2TagArrayRead, L2DataArrayRead,L1D1TagArrayRead} {
+   l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(M0, {C0_Store_L1hit, C0_Store_L1miss}) {L1D0TagArrayRead, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead} {
+    a0_allocateL1D;
+    s0_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(M0, {C1_Store_L1hit, C1_Store_L1miss}, M1) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayRead, L2TagArrayWrite} {
+    a1_allocateL1D;
+    i0_invCluster;
+    s1_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(M1, C0_Load_L1miss, M1_Ms) {L2TagArrayRead, L2DataArrayRead, L1D0TagArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(M1, C1_Load_L1miss, M1_F) {L1D1TagArrayRead L2TagArrayRead, L2DataArrayRead} {
+   l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(M1, {C0_Store_L1hit, C0_Store_L1miss}, M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayRead, L2DataArrayWrite, L2TagArrayWrite} {
+    a0_allocateL1D;
+    i1_invCluster;
+    s0_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  transition(M1, {C1_Store_L1hit, C1_Store_L1miss}) {L1D1TagArrayRead, L1D1DataArrayWrite, L2TagArrayRead, L2DataArrayWrite} {
+    a1_allocateL1D;
+    s1_storeDone;
+    p_popMandatoryQueue;
+  }
+
+  // end transitions from base
+
+  // Begin simple hit transitions
+  transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es,
+          Ms_F1, M0_Ms}, C0_Load_L1hit) {L1D0TagArrayRead, L1D0DataArrayRead} {
+    // track hits, if implemented
+    l0_loadDone;
+    p_popMandatoryQueue;
+  }
+
+  transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es,
+          Ms_F0, M1_Ms}, C1_Load_L1hit) {L1D1TagArrayRead, L1D1DataArrayRead} {
+    // track hits, if implemented
+    l1_loadDone;
+    p_popMandatoryQueue;
+  }
+
+  transition({S, S_C, S_F0, S_F1, S_F}, Ifetch0_L1hit) {L1ITagArrayRead, L1IDataArrayRead} {
+    // track hits, if implemented
+    il0_loadDone;
+    p_popMandatoryQueue;
+  }
+
+  transition({S, S_C, S_F0, S_F1, S_F}, Ifetch1_L1hit) {L1ITagArrayRead, L1IDataArrayWrite} {
+    // track hits, if implemented
+    il1_loadDone;
+    p_popMandatoryQueue;
+  }
+
+  // end simple hit transitions
+
+  // Transitions from transient states
+
+  // recycles
+  transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+          IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F,
+          E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, C0_Load_L1hit) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M1,
+          O_M1, S0, S1, I_C, S0_C, S1_C, S_C}, C0_Load_L1miss) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+          IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F,
+          E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, C1_Load_L1hit) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, Si_F0, Si_F1, S_M0,
+          O_M0, S0, S1, I_C, S0_C, S1_C, S_C},  C1_Load_L1miss) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, {Ifetch0_L1hit, Ifetch1_L1hit}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES,
+          IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, ES_I, MO_I, S_F0, S_F1, S_F,
+          O_F0, O_F1, O_F, S_M0, S_M1, O_M0, O_M1, Es_F0, Es_F1, Es_F, E0_F,
+          E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, I_C,
+          S_C}, {Ifetch0_L1miss, Ifetch1_L1miss}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_E1S, IF_E1S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1, S_F1, O_F1,
+          Si_F0, Si_F1, S_M1, O_M1, S0, S1, Es_F1, E1_F, E0_Es, Ms_F1, M0_Ms,
+          M1_F, I_C, S0_C, S1_C, S_C}, {C0_Store_L1miss}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_E0S, IF_E0S, F_S0, F_S1, ES_I, MO_I, MO_S0, MO_S1 S_F0, O_F0,
+          Si_F0, Si_F1, S_M0, O_M0, S0, S1, Es_F0, E0_F, E1_Es, Ms_F0, M0_F,
+          M1_Ms, I_C, S0_C, S1_C, S_C}, {C1_Store_L1miss}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+          IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M0, O_M0, Es_F0, Es_F1, Es_F, E0_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_Ms}, {C0_Store_L1hit}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+          IF0_ES, IF1_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, Si_F0, Si_F1, S_M1,
+          O_M1, Es_F0, Es_F1, Es_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F,
+          M0_Ms, M1_F, M1_Ms}, {C1_Store_L1hit}) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M0, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_ES, IF_E0S, IF_ES,
+          IF0_ES, IF1_ES, S_F0, S_F, O_F0, O_F, S_M0, O_M0, Es_F0, Es_F, E0_F,
+          E1_Es, Ms_F0, Ms_F, M0_F, M1_Ms}, L1D0_Repl) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E1S, I_ES, IF_E1S, IF_ES,
+          IF0_ES, IF1_ES, S_F1, S_F, O_F1, O_F, S_M1, O_M1, Es_F1, Es_F, E1_F,
+          E0_Es, Ms_F1, Ms_F, M0_Ms, M1_F}, L1D1_Repl) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({F_S0, F_S1, MO_S0, MO_S1, Si_F0, Si_F1, S0, S1, S0_C, S1_C}, L1I_Repl) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({S_C, S0_C, S1_C, S0, S1, Si_F0, Si_F1, I_M0, I_M1, I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_E0S, I_E1S, I_ES, S_F0, S_F1, S_F, O_F0, O_F1, O_F, S_M0, O_M0, S_M1, O_M1, Es_F0, Es_F1, Es_F, E0_F, E1_F, E0_Es, E1_Es, Ms_F0, Ms_F1, Ms_F, M0_F, M0_Ms, M1_F, M1_Ms, MO_S0, MO_S1, IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, L2_Repl) {} {
+    zz_recycleMandatoryQueue;
+  }
+
+  transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES, F_S0, F_S1}, {NB_AckS,
+          PrbInvData, PrbInvDataDemand, PrbInv, PrbShrData, PrbShrDataDemand}) {} {
+    zz_recycleMandatoryQueue;  // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary.
+  }
+
+  transition({IF_E0S, IF_E1S, IF_ES, IF0_ES, IF1_ES}, NB_AckE) {} {
+    zz_recycleMandatoryQueue;  // these should be resolved soon, but I didn't want to add more states, though technically they could be solved now, and probes really could be solved but i don't think it's really necessary.
+  }
+
+  transition({E0_Es, E1_F, Es_F1}, C0_Load_L1miss, Es_F) {L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(S_F1, C0_Load_L1miss, S_F) {L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(O_F1, C0_Load_L1miss, O_F) {L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition({Ms_F1, M0_Ms, M1_F}, C0_Load_L1miss, Ms_F) {L2DataArrayRead} {
+    l10m_profileMiss;
+    a0_allocateL1D;
+    f0_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_M0, C1_Load_L1miss, I_M0Ms){
+    l11m_profileMiss;
+    l2m_profileMiss;
+    a1_allocateL1D;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_M1, C0_Load_L1miss, I_M1Ms){
+    l10m_profileMiss;
+    l2m_profileMiss;
+    a0_allocateL1D;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_M0, C1_Store_L1miss, I_M0M1) {
+    l11m_profileMiss;
+    l2m_profileMiss;
+    a1_allocateL1D;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_M1, C0_Store_L1miss, I_M1M0) {L1D0TagArrayRead, L1D0TagArrayWrite, L2TagArrayRead, L2TagArrayWrite} {
+    l2m_profileMiss;
+    a0_allocateL1D;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_E0S, C1_Load_L1miss, I_ES) {} {
+    l2m_profileMiss;
+    l11m_profileMiss;
+    a1_allocateL1D;
+    p_popMandatoryQueue;
+  }
+
+  transition(I_E1S, C0_Load_L1miss, I_ES) {} {
+    l2m_profileMiss;
+    l10m_profileMiss;
+    l2m_profileMiss;
+    a0_allocateL1D;
+    p_popMandatoryQueue;
+  }
+
+  transition({E1_Es, E0_F, Es_F0}, C1_Load_L1miss, Es_F) {L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(S_F0, C1_Load_L1miss, S_F) { L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition(O_F0, C1_Load_L1miss, O_F) {L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition({Ms_F0, M1_Ms, M0_F}, C1_Load_L1miss, Ms_F) {L2DataArrayRead} {
+    l11m_profileMiss;
+    a1_allocateL1D;
+    f1_L2ToL1;
+    p_popMandatoryQueue;
+  }
+
+  transition({S, Es, E0, O, Ms, M0, O_F1, S_F1, Si_F0, Si_F1, Es_F1, E0_Es, Ms_F1, M0_Ms}, L1D0_Repl) {L1D0TagArrayRead} {
+    i0_invCluster;
+  }
+
+  transition({S, Es, E1, O, Ms, M1, O_F0, S_F0, Si_F0, Si_F1, Es_F0, E1_Es, Ms_F0, M1_Ms}, L1D1_Repl) {L1D1TagArrayRead} {
+    i1_invCluster;
+  }
+
+  transition({S, S_C, S_F0, S_F1}, L1I_Repl) {L1ITagArrayRead} {
+    ii_invIcache;
+  }
+
+  transition({S, E0, E1, Es}, L2_Repl, ES_I) {L2TagArrayRead,L1D0TagArrayRead, L1D1TagArrayRead, L1ITagArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    t_allocateTBE;
+    vc_victim;
+    ib_invBothClusters;
+    i2_invL2;
+    ii_invIcache;
+  }
+
+  transition({Ms, M0, M1, O}, L2_Repl, MO_I) {L2TagArrayRead, L2TagArrayWrite, L1D0TagArrayRead, L1D1TagArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    t_allocateTBE;
+    vd_victim;
+    i2_invL2;
+    ib_invBothClusters;  // nothing will happen for D0 on M1, vice versa
+  }
+
+  transition(S0, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    wi_writeIcache;
+    xi0_loadDone;
+    uu_sendUnblock;
+    sdv_sendDoneValid;
+    pr_popResponseQueue;
+  }
+
+  transition(S1, NB_AckS, S) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    wi_writeIcache;
+    xi1_loadDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(S0_C, NB_AckS, S_C) { L1IDataArrayWrite,L2DataArrayWrite} {
+    // does not need send done since the rdblks was "sinked"
+    wi_writeIcache;
+    xi0_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(S1_C, NB_AckS, S_C) { L1D1DataArrayWrite,L2DataArrayWrite} {
+    wi_writeIcache;
+    xi1_loadDone;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M0, NB_AckM, M0) { L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    w0_writeDcache;
+    xs0_storeDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+    w1_writeDcache;
+    xs1_storeDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  // THESE MO->M1 should not be instantaneous but oh well for now.
+  transition(I_M0M1, NB_AckM, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+    w0_writeDcache;
+    xs0_storeDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    i0_invCluster;
+    s1_storeDone;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M1M0, NB_AckM, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite,L2DataArrayWrite, L2TagArrayWrite} {
+    w1_writeDcache;
+    xs1_storeDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    i1_invCluster;
+    s0_storeDone;
+    pr_popResponseQueue;
+  }
+
+  // Above shoudl be more like this, which has some latency to xfer to L1
+  transition(I_M0Ms, NB_AckM, M0_Ms) {L1D0DataArrayWrite,L2DataArrayWrite} {
+    w0_writeDcache;
+    xs0_storeDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    f1_L2ToL1;
+    pr_popResponseQueue;
+  }
+
+  transition(I_M1Ms, NB_AckM, M1_Ms) {L1D1DataArrayWrite,L2DataArrayWrite} {
+    w1_writeDcache;
+    xs1_storeDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    f0_L2ToL1;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E0S, NB_AckE, E0) {L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    w0_writeDcache;
+    xl0_loadDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E1S, NB_AckE, E1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    w1_writeDcache;
+    xl1_loadDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_ES, NB_AckE, Es) {L1D1DataArrayWrite, L1D1TagArrayWrite, L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite } {
+    w0_writeDcache;
+    xl0_loadDone;
+    w1_writeDcache;
+    xl1_loadDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E0S, NB_AckS, S) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    w0_writeDcache;
+    xl0_loadDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_E1S, NB_AckS, S) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayWrite} {
+    w1_writeDcache;
+    xl1_loadDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(I_ES, NB_AckS, S) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite,  L2DataArrayWrite} {
+    w0_writeDcache;
+    xl0_loadDone;
+    w1_writeDcache;
+    xl1_loadDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(S_F0, L2_to_L1D0, S) {L1D0TagArrayWrite, L1D0DataArrayWrite,  L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(S_F1, L2_to_L1D1, S) {L1D1TagArrayWrite, L1D1DataArrayWrite,  L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Si_F0, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite,  L2TagArrayWrite, L2DataArrayRead} {
+    ci_copyL2ToL1;
+    il0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Si_F1, L2_to_L1I, S) {L1ITagArrayWrite, L1IDataArrayWrite,  L2TagArrayWrite, L2DataArrayRead} {
+    ci_copyL2ToL1;
+    il1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(S_F, L2_to_L1D0, S_F1) { L1D0DataArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(S_F, L2_to_L1D1, S_F0) { L1D1DataArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(O_F0, L2_to_L1D0, O) { L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(O_F1, L2_to_L1D1, O) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(O_F, L2_to_L1D0, O_F1) { L1D0DataArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(O_F, L2_to_L1D1, O_F0) { L1D1DataArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(M1_F, L2_to_L1D1, M1) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(M0_F, L2_to_L1D0, M0) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Ms_F0, L2_to_L1D0, Ms) {L1D0DataArrayWrite, L1D0TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Ms_F1, L2_to_L1D1, Ms) {L1D1DataArrayWrite, L1D1TagArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Ms_F, L2_to_L1D0, Ms_F1) {L1D0DataArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Ms_F, L2_to_L1D1, Ms_F0) {L1IDataArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(M1_Ms, L2_to_L1D0, Ms) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(M0_Ms, L2_to_L1D1, Ms) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Es_F0, L2_to_L1D0, Es) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Es_F1, L2_to_L1D1, Es) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2TagArrayWrite, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Es_F, L2_to_L1D0, Es_F1) {L2TagArrayRead, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(Es_F, L2_to_L1D1, Es_F0) {L2TagArrayRead, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(E0_F, L2_to_L1D0, E0) {L2TagArrayRead, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(E1_F, L2_to_L1D1, E1) {L2TagArrayRead, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(E1_Es, L2_to_L1D0, Es) {L2TagArrayRead, L2DataArrayRead} {
+    c0_copyL2ToL1;
+    l0_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(E0_Es, L2_to_L1D1, Es) {L2TagArrayRead, L2DataArrayRead} {
+    c1_copyL2ToL1;
+    l1_loadDone;
+    pt_popTriggerQueue;
+  }
+
+  transition(IF_E0S, L2_to_L1D0, I_E0S) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF_E1S, L2_to_L1D1, I_E1S) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF_ES, L2_to_L1D0, IF1_ES) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF_ES, L2_to_L1D1, IF0_ES) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF0_ES, L2_to_L1D0, I_ES) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(IF1_ES, L2_to_L1D1, I_ES) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(F_S0, L2_to_L1I, S0) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(F_S1, L2_to_L1I, S1) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition({S_M0, O_M0}, NB_AckM, M0) {L1D0TagArrayWrite, L1D0DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    xs0_storeDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition({S_M1, O_M1}, NB_AckM, M1) {L1D1TagArrayWrite, L1D1DataArrayWrite, L2DataArrayWrite, L2TagArrayWrite} {
+    xs1_storeDone;
+    sdv_sendDoneValid;
+    uu_sendUnblock;
+    pr_popResponseQueue;
+  }
+
+  transition(MO_I, NB_AckWB, I) {L2TagArrayWrite} {
+    wb_data;
+    ra_sendReplAck;
+    sdi_sendDoneInvalid;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(ES_I, NB_AckWB, I) {L2TagArrayWrite} {
+    wb_data;
+    ra_sendReplAck;
+    sdi_sendDoneInvalid;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(MO_S0, NB_AckWB, S0) {L2TagArrayWrite} {
+    wb_data;
+    i2_invL2;
+    a2_allocateL2;
+    sdv_sendDoneValid;
+    nS_issueRdBlkS;
+    d_deallocateTBE; // FOO
+    pr_popResponseQueue;
+  }
+
+  transition(MO_S1, NB_AckWB, S1) {L2TagArrayWrite} {
+    wb_data;
+    i2_invL2;
+    a2_allocateL2;
+    sdv_sendDoneValid;
+    nS_issueRdBlkS;
+    d_deallocateTBE; // FOO
+    pr_popResponseQueue;
+  }
+
+  // Writeback cancel "ack"
+  transition(I_C, NB_AckWB, I) {L2TagArrayWrite} {
+    ss_sendStaleNotification;
+    sdi_sendDoneInvalid;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(S0_C, NB_AckWB, S0) {L2TagArrayWrite} {
+    ss_sendStaleNotification;
+    sdv_sendDoneValid;
+    pr_popResponseQueue;
+  }
+
+  transition(S1_C, NB_AckWB, S1) {L2TagArrayWrite} {
+    ss_sendStaleNotification;
+    sdv_sendDoneValid;
+    pr_popResponseQueue;
+  }
+
+  transition(S_C, NB_AckWB, S) {L2TagArrayWrite} {
+    ss_sendStaleNotification;
+    sdv_sendDoneValid;
+    pr_popResponseQueue;
+  }
+
+  // Begin Probe Transitions
+
+  transition({Ms, M0, M1, O}, {PrbInvData, PrbInvDataDemand}, I) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pd_sendProbeResponseData;
+    i2_invL2;
+    ib_invBothClusters;
+    pp_popProbeQueue;
+  }
+
+  transition({Es, E0, E1, S, I}, {PrbInvData, PrbInvDataDemand}, I) {L2TagArrayRead, L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    ib_invBothClusters;
+    ii_invIcache;  // only relevant for S
+    pp_popProbeQueue;
+  }
+
+  transition(S_C, {PrbInvData, PrbInvDataDemand}, I_C) {L2TagArrayWrite} {
+    t_allocateTBE;
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, {PrbInvData, PrbInvDataDemand}, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms, M0, M1, O, Es, E0, E1, S, I}, PrbInv, I) {L2TagArrayRead, L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2; // nothing will happen in I
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(S_C, PrbInv, I_C) {L2TagArrayWrite} {
+    t_allocateTBE;
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(I_C, PrbInv, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms, M0, M1, O}, {PrbShrData, PrbShrDataDemand}, O) {L2TagArrayRead, L2TagArrayWrite, L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({Es, E0, E1, S}, {PrbShrData, PrbShrDataDemand}, S) {L2TagArrayRead, L2TagArrayWrite} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition(S_C, {PrbShrData, PrbShrDataDemand}) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition({I, I_C}, {PrbShrData, PrbShrDataDemand}) {L2TagArrayRead} {
+    pb_sendProbeResponseBackprobe;
+    pp_popProbeQueue;
+  }
+
+  transition({I_M0, I_E0S}, {PrbInv, PrbInvData, PrbInvDataDemand}) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;  // must invalidate current data (only relevant for I_M0)
+    a0_allocateL1D;  // but make sure there is room for incoming data when it arrives
+    pp_popProbeQueue;
+  }
+
+  transition({I_M1, I_E1S}, {PrbInv, PrbInvData, PrbInvDataDemand}) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters; // must invalidate current data (only relevant for I_M1)
+    a1_allocateL1D;  // but make sure there is room for incoming data when it arrives
+    pp_popProbeQueue;
+  }
+
+  transition({I_M0M1, I_M1M0, I_M0Ms, I_M1Ms, I_ES}, {PrbInv, PrbInvData, PrbInvDataDemand, PrbShrData, PrbShrDataDemand}) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    a0_allocateL1D;
+    a1_allocateL1D;
+    pp_popProbeQueue;
+  }
+
+  transition({I_M0, I_E0S, I_M1, I_E1S}, {PrbShrData, PrbShrDataDemand}) {} {
+    pb_sendProbeResponseBackprobe;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, {PrbInvData, PrbInvDataDemand}, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, {PrbInvData, PrbInvDataDemand}, I_C) {} {
+    pdt_sendProbeResponseDataFromTBE;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, PrbInv, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, PrbInv, I_C) {} {
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    pp_popProbeQueue;
+  }
+
+  transition(ES_I, {PrbShrData, PrbShrDataDemand}, ES_I) {} {
+    ph_sendProbeResponseHit;
+    s_setSharedFlip;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_I, {PrbShrData, PrbShrDataDemand}, MO_I) {} {
+    pdt_sendProbeResponseDataFromTBE;
+    s_setSharedFlip;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_S0, {PrbInvData, PrbInvDataDemand}, S0_C) {L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pdt_sendProbeResponseDataFromTBE;
+    i2_invL2;
+    a2_allocateL2;
+    nS_issueRdBlkS;
+    d_deallocateTBE;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_S1, {PrbInvData, PrbInvDataDemand}, S1_C) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pdt_sendProbeResponseDataFromTBE;
+    i2_invL2;
+    a2_allocateL2;
+    nS_issueRdBlkS;
+    d_deallocateTBE;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_S0, PrbInv, S0_C) {L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    a2_allocateL2;
+    nS_issueRdBlkS;
+    d_deallocateTBE;
+    pp_popProbeQueue;
+  }
+
+  transition(MO_S1, PrbInv, S1_C) {L2TagArrayWrite} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    i2_invL2;
+    a2_allocateL2;
+    nS_issueRdBlkS;
+    d_deallocateTBE;
+    pp_popProbeQueue;
+  }
+
+  transition({MO_S0, MO_S1}, {PrbShrData, PrbShrDataDemand}) {} {
+    pdt_sendProbeResponseDataFromTBE;
+    s_setSharedFlip;
+    pp_popProbeQueue;
+  }
+
+  transition({S_F0, Es_F0, E0_F, E1_Es}, {PrbInvData, PrbInvDataDemand, PrbInv}, IF_E0S) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    // invalidate everything you've got
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    // but make sure you have room for what you need from the fill
+    a0_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({S_F1, Es_F1, E1_F, E0_Es}, {PrbInvData, PrbInvDataDemand, PrbInv}, IF_E1S) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    // invalidate everything you've got
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    // but make sure you have room for what you need from the fill
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({S_F, Es_F}, {PrbInvData, PrbInvDataDemand, PrbInv}, IF_ES) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    // invalidate everything you've got
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    // but make sure you have room for what you need from the fill
+    a0_allocateL1D;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition(Si_F0, {PrbInvData, PrbInvDataDemand, PrbInv}, F_S0) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    nS_issueRdBlkS;
+    pp_popProbeQueue;
+  }
+
+  transition(Si_F1, {PrbInvData, PrbInvDataDemand, PrbInv}, F_S1) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    nS_issueRdBlkS;
+    pp_popProbeQueue;
+  }
+
+  transition({Es_F0, E0_F, E1_Es}, {PrbShrData, PrbShrDataDemand}, S_F0) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition({Es_F1, E1_F, E0_Es}, {PrbShrData, PrbShrDataDemand}, S_F1) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition(Es_F, {PrbShrData, PrbShrDataDemand}, S_F) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition({S_F0, S_F1, S_F, Si_F0, Si_F1}, {PrbShrData, PrbShrDataDemand}) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition(S_M0, {PrbInvData, PrbInvDataDemand}, I_M0) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pim_sendProbeResponseInvMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition(O_M0, {PrbInvData, PrbInvDataDemand}, I_M0) {L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pdm_sendProbeResponseDataMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S_M0, O_M0}, {PrbInv}, I_M0) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pim_sendProbeResponseInvMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition(S_M1, {PrbInvData, PrbInvDataDemand}, I_M1) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pim_sendProbeResponseInvMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition(O_M1, {PrbInvData, PrbInvDataDemand}, I_M1) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pdm_sendProbeResponseDataMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S_M1, O_M1}, {PrbInv}, I_M1) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pim_sendProbeResponseInvMs;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S0, S0_C}, {PrbInvData, PrbInvDataDemand, PrbInv}) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S1, S1_C}, {PrbInvData, PrbInvDataDemand, PrbInv}) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    ii_invIcache;
+    i2_invL2;
+    ai_allocateL1I;
+    a2_allocateL2;
+    pp_popProbeQueue;
+  }
+
+  transition({S_M0, S_M1}, {PrbShrData, PrbShrDataDemand}) {} {
+    ph_sendProbeResponseHit;
+    pp_popProbeQueue;
+  }
+
+  transition({O_M0, O_M1}, {PrbShrData, PrbShrDataDemand}) {L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({S0, S1, S0_C, S1_C}, {PrbShrData, PrbShrDataDemand}) {} {
+    pb_sendProbeResponseBackprobe;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F0, M0_F, M1_Ms, O_F0}, {PrbInvData, PrbInvDataDemand}, IF_E0S) {L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pd_sendProbeResponseData;
+    ib_invBothClusters;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F1, M1_F, M0_Ms, O_F1}, {PrbInvData, PrbInvDataDemand}, IF_E1S) {L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pd_sendProbeResponseData;
+    ib_invBothClusters;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F, O_F}, {PrbInvData, PrbInvDataDemand}, IF_ES) {L2DataArrayRead} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pd_sendProbeResponseData;
+    ib_invBothClusters;
+    i2_invL2;
+    a0_allocateL1D;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F0, M0_F, M1_Ms, O_F0}, PrbInv, IF_E0S) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    i2_invL2;
+    a0_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F1, M1_F, M0_Ms, O_F1}, PrbInv, IF_E1S) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    i2_invL2;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F, O_F}, PrbInv, IF_ES) {} {
+    forward_eviction_to_cpu0;
+    forward_eviction_to_cpu1;
+    pi_sendProbeResponseInv;
+    ib_invBothClusters;
+    i2_invL2;
+    a0_allocateL1D;
+    a1_allocateL1D;
+    a2_allocateL2;
+    n_issueRdBlk;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F0, M0_F, M1_Ms}, {PrbShrData, PrbShrDataDemand}, O_F0) {L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({Ms_F1, M1_F, M0_Ms}, {PrbShrData, PrbShrDataDemand}, O_F1) {} {
+  }
+
+  transition({Ms_F}, {PrbShrData, PrbShrDataDemand}, O_F) {L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  transition({O_F0, O_F1, O_F}, {PrbShrData, PrbShrDataDemand}) {L2DataArrayRead} {
+    pd_sendProbeResponseData;
+    pp_popProbeQueue;
+  }
+
+  // END TRANSITIONS
+}
+
+
diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm
new file mode 100644
index 000000000..52d87fb8b
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm
@@ -0,0 +1,2038 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:Directory, "AMD_Base-like protocol")
+: DirectoryMemory * directory;
+  CacheMemory * L3CacheMemory;
+  Cycles response_latency := 5;
+  Cycles response_latency_regionDir := 1;
+  Cycles l3_hit_latency := 30;
+  bool useL3OnWT := "False";
+  Cycles to_memory_controller_latency := 1;
+
+  // From the Cores
+  MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseFromCores, network="From", virtual_network="2", vnet_type="response";
+  MessageBuffer * unblockFromCores, network="From", virtual_network="4", vnet_type="unblock";
+
+  // To the Cores
+  MessageBuffer * probeToCore, network="To", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseToCore, network="To", virtual_network="2", vnet_type="response";
+
+  // From region buffer
+  MessageBuffer * reqFromRegBuf, network="From", virtual_network="7", vnet_type="request";
+
+  // To Region directory
+  MessageBuffer * reqToRegDir, network="To", virtual_network="5", vnet_type="request";
+  MessageBuffer * reqFromRegDir, network="From", virtual_network="5", vnet_type="request";
+  MessageBuffer * unblockToRegDir, network="To", virtual_network="4", vnet_type="unblock";
+
+  MessageBuffer * triggerQueue;
+  MessageBuffer * L3triggerQueue;
+  MessageBuffer * responseFromMemory;
+{
+  // STATES
+  state_declaration(State, desc="Directory states", default="Directory_State_U") {
+    U, AccessPermission:Backing_Store,                 desc="unblocked";
+    BR, AccessPermission:Backing_Store,                  desc="got CPU read request, blocked while sent to L3";
+    BW, AccessPermission:Backing_Store,                  desc="got CPU write request, blocked while sent to L3";
+    BL, AccessPermission:Busy,                  desc="got L3 WB request";
+    // BL is Busy because it's possible for the data only to be in the network
+    // in the WB, L3 has sent it and gone on with its business in possibly I
+    // state.
+    BI, AccessPermission:Backing_Store,                   desc="Blocked waiting for inv ack from core";
+    BS_M, AccessPermission:Backing_Store,                 desc="blocked waiting for memory";
+    BM_M, AccessPermission:Backing_Store,                 desc="blocked waiting for memory";
+    B_M, AccessPermission:Backing_Store,                 desc="blocked waiting for memory";
+    BP, AccessPermission:Backing_Store,                 desc="blocked waiting for probes, no need for memory";
+    BS_PM, AccessPermission:Backing_Store,                desc="blocked waiting for probes and Memory";
+    BM_PM, AccessPermission:Backing_Store,                desc="blocked waiting for probes and Memory";
+    B_PM, AccessPermission:Backing_Store,                desc="blocked waiting for probes and Memory";
+    BS_Pm, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    BM_Pm, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    B_Pm, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    B, AccessPermission:Backing_Store,                  desc="sent response, Blocked til ack";
+
+    // These are needed for when a private requests was issued before an inv was received
+    // for writebacks
+    BS_Pm_BL, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    BM_Pm_BL, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    B_Pm_BL, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    BP_BL, AccessPermission:Backing_Store,                 desc="blocked waiting for probes, no need for memory";
+    // for reads
+    BS_Pm_B, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    BM_Pm_B, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    B_Pm_B, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    BP_B, AccessPermission:Backing_Store,                 desc="blocked waiting for probes, no need for memory";
+  }
+
+  // Events
+  enumeration(Event, desc="Directory events") {
+    // CPU requests
+    RdBlkS,             desc="...";
+    RdBlkM,             desc="...";
+    RdBlk,              desc="...";
+    WriteThrough,       desc="WriteThrough Message";
+    Atomic,             desc="Atomic Message";
+
+    RdBlkSP,             desc="...";
+    RdBlkMP,             desc="...";
+    RdBlkP,              desc="...";
+    VicDirtyP,           desc="...";
+    VicCleanP,           desc="...";
+    WriteThroughP,       desc="WriteThrough Message";
+    AtomicP,             desc="Atomic Message";
+
+    // writebacks
+    VicDirty,           desc="...";
+    VicClean,           desc="...";
+    CPUData,            desc="WB data from CPU";
+    StaleWB,            desc="WB response for a no longer valid request";
+
+    // probe responses
+    CPUPrbResp,            desc="Probe Response Msg";
+    LastCPUPrbResp,        desc="Last Probe Response Msg";
+
+    ProbeAcksComplete,  desc="Probe Acks Complete";
+
+    L3Hit,              desc="Hit in L3 return data to core";
+
+    // Memory Controller
+    MemData, desc="Fetched data from memory arrives";
+    WBAck, desc="Writeback Ack from memory arrives";
+
+    CoreUnblock,            desc="Core received data, unblock";
+    UnblockWriteThrough,    desc="unblock, self triggered";
+
+    StaleVicDirty,       desc="Core invalidated before VicDirty processed";
+    StaleVicDirtyP,       desc="Core invalidated before VicDirty processed";
+
+    // For region protocol
+    CPUReq,              desc="Generic CPU request";
+    Inv,                 desc="Region dir needs a block invalidated";
+    Downgrade,           desc="Region dir needs a block downgraded";
+
+    // For private accesses (bypassed reg-dir)
+    CPUReadP,            desc="Initial req from core, sent to L3";
+    CPUWriteP,           desc="Initial req from core, sent to L3";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    L3DataArrayRead,    desc="Read the data array";
+    L3DataArrayWrite,   desc="Write the data array";
+    L3TagArrayRead,     desc="Read the data array";
+    L3TagArrayWrite,    desc="Write the data array";
+  }
+
+  // TYPES
+
+  // DirectoryEntry
+  structure(Entry, desc="...", interface="AbstractEntry") {
+    State DirectoryState,          desc="Directory state";
+    DataBlock DataBlk,             desc="data for the block";
+    NetDest VicDirtyIgnore,  desc="VicDirty coming from whom to ignore";
+  }
+
+  structure(CacheEntry, desc="...", interface="AbstractCacheEntry") {
+    DataBlock DataBlk,          desc="data for the block";
+    MachineID LastSender,       desc="Mach which this block came from";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,     desc="Transient state";
+    DataBlock DataBlk,  desc="data for the block";
+    DataBlock DataBlkAux,  desc="Auxiliary data for the block";
+    bool Dirty,         desc="Is the data dirty?";
+    int NumPendingAcks,        desc="num acks expected";
+    MachineID OriginalRequestor,        desc="Original Requestor";
+    MachineID WTRequestor,        desc="WT Requestor";
+    bool Cached,        desc="data hit in Cache";
+    bool MemData,       desc="Got MemData?",default="false";
+    bool wtData,       desc="Got write through data?",default="false";
+    bool atomicData,   desc="Got Atomic op?",default="false";
+    Cycles InitialRequestTime, desc="...";
+    Cycles ForwardRequestTime, desc="...";
+    Cycles ProbeRequestStartTime, desc="...";
+    bool DemandRequest, desc="for profiling";
+    MachineID LastSender, desc="Mach which this block came from";
+    bool L3Hit, default="false", desc="Was this an L3 hit?";
+    bool TriggeredAcksComplete, default="false", desc="True if already triggered acks complete";
+    WriteMask writeMask,    desc="outstanding write through mask";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  void set_tbe(TBE a);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" {
+    Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr));
+
+    if (is_valid(dir_entry)) {
+      //DPRINTF(RubySlicc, "Getting entry %s: %s\n", addr, dir_entry.DataBlk);
+      return dir_entry;
+    }
+
+    dir_entry :=  static_cast(Entry, "pointer",
+                              directory.allocate(addr, new Entry));
+    return dir_entry;
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if (is_valid(tbe) && tbe.MemData) {
+      DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe);
+      return tbe.DataBlk;
+    }
+    DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr));
+    return getDirectoryEntry(addr).DataBlk;
+  }
+
+  State getState(TBE tbe, CacheEntry entry, Addr addr) {
+    return getDirectoryEntry(addr).DirectoryState;
+  }
+
+  State getStateFromAddr(Addr addr) {
+    return getDirectoryEntry(addr).DirectoryState;
+  }
+
+  void setState(TBE tbe, CacheEntry entry, Addr addr, State state) {
+    getDirectoryEntry(addr).DirectoryState := state;
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    // For this Directory, all permissions are just tracked in Directory, since
+    // it's not possible to have something in TBE but not Dir, just keep track
+    // of state all in one place.
+    if(directory.isPresent(addr)) {
+      return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  void setAccessPermission(CacheEntry entry, Addr addr, State state) {
+    getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state));
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L3DataArrayRead) {
+      L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L3DataArrayWrite) {
+      L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L3TagArrayRead) {
+      L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L3TagArrayWrite) {
+      L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L3DataArrayRead) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L3DataArrayWrite) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L3TagArrayRead) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L3TagArrayWrite) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  // ** OUT_PORTS **
+  out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore);
+  out_port(responseNetwork_out, ResponseMsg, responseToCore);
+
+  out_port(requestNetworkReg_out, CPURequestMsg, reqToRegDir);
+  out_port(regAckNetwork_out, UnblockMsg, unblockToRegDir);
+
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+  out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue);
+
+  // ** IN_PORTS **
+
+  // Trigger Queue
+  in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=7) {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == TriggerType:AcksComplete) {
+          trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == TriggerType:UnblockWriteThrough) {
+          trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe);
+        } else {
+          error("Unknown trigger msg");
+        }
+      }
+    }
+  }
+
+  in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=6) {
+    if (L3TriggerQueue_in.isReady(clockEdge())) {
+      peek(L3TriggerQueue_in, TriggerMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == TriggerType:L3Hit) {
+          trigger(Event:L3Hit, in_msg.addr, entry, tbe);
+        } else {
+          error("Unknown trigger msg");
+        }
+      }
+    }
+  }
+
+  // Unblock Network
+  in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=5) {
+    if (unblockNetwork_in.isReady(clockEdge())) {
+      peek(unblockNetwork_in, UnblockMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        trigger(Event:CoreUnblock, in_msg.addr, entry, tbe);
+      }
+    }
+  }
+
+  // Core response network
+  in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=4) {
+    if (responseNetwork_in.isReady(clockEdge())) {
+      peek(responseNetwork_in, ResponseMsg) {
+          DPRINTF(RubySlicc, "core responses %s\n", in_msg);
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+          if (is_valid(tbe) && tbe.NumPendingAcks == 1
+            && tbe.TriggeredAcksComplete == false) {
+            trigger(Event:LastCPUPrbResp, in_msg.addr, entry, tbe);
+          } else {
+            trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:CPUData) {
+          trigger(Event:CPUData, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+            trigger(Event:StaleWB, in_msg.addr, entry, tbe);
+        } else {
+          error("Unexpected response type");
+        }
+      }
+    }
+  }
+
+  // off-chip memory request/response is done
+  in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=3) {
+    if (memQueue_in.isReady(clockEdge())) {
+      peek(memQueue_in, MemoryMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
+          trigger(Event:MemData, in_msg.addr, entry, tbe);
+          DPRINTF(RubySlicc, "%s\n", in_msg);
+        } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
+          trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them.
+        } else {
+          DPRINTF(RubySlicc, "%s\n", in_msg.Type);
+          error("Invalid message");
+        }
+      }
+    }
+  }
+
+  in_port(regBuf_in, CPURequestMsg, reqFromRegBuf, rank=2) {
+    if (regBuf_in.isReady(clockEdge())) {
+      peek(regBuf_in, CPURequestMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == CoherenceRequestType:ForceInv) {
+          trigger(Event:Inv, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:ForceDowngrade) {
+          trigger(Event:Downgrade, in_msg.addr, entry, tbe);
+        } else {
+          error("Bad request from region buffer");
+        }
+      }
+    }
+  }
+
+  in_port(regDir_in, CPURequestMsg, reqFromRegDir, rank=1) {
+    if (regDir_in.isReady(clockEdge())) {
+      peek(regDir_in, CPURequestMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == CoherenceRequestType:RdBlk) {
+          trigger(Event:RdBlk, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+          trigger(Event:RdBlkS, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+          trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+          trigger(Event:Atomic, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+          trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+          if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+            DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr);
+            trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+          } else {
+            trigger(Event:VicDirty, in_msg.addr, entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+          if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+            DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr);
+            trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+          } else {
+            trigger(Event:VicClean, in_msg.addr, entry, tbe);
+          }
+        } else {
+          error("Bad message type fwded from Region Dir");
+        }
+      }
+    }
+  }
+
+  in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) {
+    if (requestNetwork_in.isReady(clockEdge())) {
+      peek(requestNetwork_in, CPURequestMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Private) {
+          // Bypass the region dir
+          if (in_msg.Type == CoherenceRequestType:RdBlk) {
+            trigger(Event:RdBlkP, in_msg.addr, entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+            trigger(Event:RdBlkSP, in_msg.addr, entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+            trigger(Event:RdBlkMP, in_msg.addr, entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+            trigger(Event:AtomicP, in_msg.addr, entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+            trigger(Event:WriteThroughP, in_msg.addr, entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+            if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+              DPRINTF(RubySlicc, "Dropping VicDirtyP for address %s\n", in_msg.addr);
+              trigger(Event:StaleVicDirtyP, in_msg.addr, entry, tbe);
+            } else {
+              DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr);
+              trigger(Event:VicDirtyP, in_msg.addr, entry, tbe);
+            }
+          } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+            if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+              DPRINTF(RubySlicc, "Dropping VicCleanP for address %s\n", in_msg.addr);
+              trigger(Event:StaleVicDirtyP, in_msg.addr, entry, tbe);
+            } else {
+              DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr);
+              trigger(Event:VicCleanP, in_msg.addr, entry, tbe);
+            }
+          } else {
+            error("Bad message type for private access");
+          }
+        } else {
+          trigger(Event:CPUReq, in_msg.addr, entry, tbe);
+        }
+      }
+    }
+  }
+
+  // Actions
+  action(s_sendResponseS, "s", desc="send Shared response") {
+    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:NBSysResp;
+      if (tbe.L3Hit) {
+        out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+      } else {
+        out_msg.Sender := machineID;
+      }
+      out_msg.Destination.add(tbe.OriginalRequestor);
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := false;
+      out_msg.State := CoherenceState:Shared;
+      out_msg.InitialRequestTime := tbe.InitialRequestTime;
+      out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+      out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+      out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.DemandRequest := tbe.DemandRequest;
+      out_msg.L3Hit := tbe.L3Hit;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(es_sendResponseES, "es", desc="send Exclusive or Shared response") {
+    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:NBSysResp;
+      if (tbe.L3Hit) {
+        out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+      } else {
+        out_msg.Sender := machineID;
+      }
+      out_msg.Destination.add(tbe.OriginalRequestor);
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := tbe.Dirty;
+      if (tbe.Cached) {
+        out_msg.State := CoherenceState:Shared;
+      } else {
+        out_msg.State := CoherenceState:Exclusive;
+      }
+      out_msg.InitialRequestTime := tbe.InitialRequestTime;
+      out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+      out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+      out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.DemandRequest := tbe.DemandRequest;
+      out_msg.L3Hit := tbe.L3Hit;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(m_sendResponseM, "m", desc="send Modified response") {
+    if (tbe.wtData) {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:UnblockWriteThrough;
+      }
+    } else {
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysResp;
+        if (tbe.L3Hit) {
+          out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+        } else {
+          out_msg.Sender := machineID;
+        }
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.Dirty := tbe.Dirty;
+        out_msg.State := CoherenceState:Modified;
+        out_msg.CtoD := false;
+        out_msg.InitialRequestTime := tbe.InitialRequestTime;
+        out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+        out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+        out_msg.OriginalResponder := tbe.LastSender;
+        out_msg.DemandRequest := tbe.DemandRequest;
+        out_msg.L3Hit := tbe.L3Hit;
+        if (tbe.atomicData) {
+          out_msg.WTRequestor := tbe.WTRequestor;
+        }
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+      if (tbe.atomicData) {
+        enqueue(triggerQueue_out, TriggerMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Type := TriggerType:UnblockWriteThrough;
+        }
+      }
+    }
+  }
+
+  action(sb_sendResponseSBypass, "sb", desc="send Shared response") {
+    peek(requestNetwork_in, CPURequestMsg) {
+    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:NBSysResp;
+      if (tbe.L3Hit) {
+        out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+      } else {
+        out_msg.Sender := machineID;
+      }
+      out_msg.Destination.add(in_msg.Requestor);
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := false;
+      out_msg.State := CoherenceState:Shared;
+      out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+      out_msg.ForwardRequestTime := curCycle();
+      out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+      out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.DemandRequest := false;
+      out_msg.L3Hit := tbe.L3Hit;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+    }
+  }
+
+  action(esb_sendResponseESBypass, "esb", desc="send Exclusive or Shared response") {
+    peek(requestNetwork_in, CPURequestMsg) {
+    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:NBSysResp;
+      if (tbe.L3Hit) {
+        out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+      } else {
+        out_msg.Sender := machineID;
+      }
+      out_msg.Destination.add(in_msg.Requestor);
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := tbe.Dirty;
+      if (tbe.Cached || in_msg.ForceShared) {
+        out_msg.State := CoherenceState:Shared;
+      } else {
+        out_msg.State := CoherenceState:Exclusive;
+      }
+      out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+      out_msg.ForwardRequestTime := curCycle();
+      out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+      out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.DemandRequest := false;
+      out_msg.L3Hit := tbe.L3Hit;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+    }
+  }
+
+  action(mbwt_sendResponseWriteThroughBypass, "mbwt", desc="send write through response") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:NBSysWBAck;
+          out_msg.Destination.add(in_msg.Requestor);
+          out_msg.WTRequestor := in_msg.WTRequestor;
+          out_msg.Sender := machineID;
+          out_msg.MessageSize := MessageSizeType:Writeback_Control;
+          out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+          out_msg.ForwardRequestTime := curCycle();
+          out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+          out_msg.DemandRequest := false;
+        }
+      } else {
+        assert(in_msg.Type == CoherenceRequestType:Atomic);
+        enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:NBSysResp;
+          if (tbe.L3Hit) {
+            out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+          } else {
+            out_msg.Sender := machineID;
+          }
+          out_msg.Destination.add(in_msg.Requestor);
+          out_msg.DataBlk := getDirectoryEntry(address).DataBlk;
+          out_msg.MessageSize := MessageSizeType:Response_Data;
+          out_msg.Dirty := in_msg.Dirty;
+          out_msg.State := CoherenceState:Modified;
+          out_msg.CtoD := false;
+          out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+          out_msg.ForwardRequestTime := curCycle();
+          out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+          out_msg.OriginalResponder := tbe.LastSender;
+          out_msg.DemandRequest := false;
+          out_msg.L3Hit := tbe.L3Hit;
+          out_msg.WTRequestor := in_msg.WTRequestor;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+      }
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:UnblockWriteThrough;
+      }
+    }
+  }
+
+  action(mb_sendResponseMBypass, "mb", desc="send Modified response") {
+    peek(requestNetwork_in, CPURequestMsg) {
+    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:NBSysResp;
+      if (tbe.L3Hit) {
+        out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+      } else {
+        out_msg.Sender := machineID;
+      }
+      out_msg.Destination.add(in_msg.Requestor);
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := tbe.Dirty;
+      out_msg.State := CoherenceState:Modified;
+      out_msg.CtoD := false;
+      out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+      out_msg.ForwardRequestTime := curCycle();
+      out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+      out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.DemandRequest := false;
+      out_msg.L3Hit := tbe.L3Hit;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+    }
+  }
+
+  action(c_sendResponseCtoD, "c", desc="send CtoD Ack") {
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        out_msg.Dirty := false;
+        out_msg.State := CoherenceState:Modified;
+        out_msg.CtoD := true;
+        out_msg.InitialRequestTime := tbe.InitialRequestTime;
+        out_msg.ForwardRequestTime := curCycle();
+        out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+        out_msg.DemandRequest := tbe.DemandRequest;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+  }
+
+  action(cp_sendResponseCtoDP, "cp", desc="send CtoD Ack") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        out_msg.Dirty := false;
+        out_msg.State := CoherenceState:Modified;
+        out_msg.CtoD := true;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := curCycle();
+        out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+        out_msg.DemandRequest := false;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+    peek(regDir_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysWBAck;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+        out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+        out_msg.DemandRequest := false;
+      }
+    }
+  }
+
+  action(wp_sendResponseWBAckP, "wp", desc="send WB Ack") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysWBAck;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := curCycle();
+        out_msg.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+        out_msg.DemandRequest := false;
+      }
+    }
+  }
+
+  action(wc_sendResponseWBAck, "wc", desc="send WB Ack for cancel") {
+    peek(responseNetwork_in, ResponseMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysWBAck;
+        out_msg.Destination.add(in_msg.Sender);
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+      }
+    }
+  }
+
+  action(ra_ackRegionDir, "ra", desc="Ack region dir") {
+    peek(regDir_in, CPURequestMsg) {
+      if (in_msg.NoAckNeeded == false) {
+        enqueue(responseNetwork_out, ResponseMsg, response_latency_regionDir) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:DirReadyAck;
+          out_msg.Destination.add(map_Address_to_RegionDir(address));
+          out_msg.Sender := machineID;
+          out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        }
+      }
+    }
+  }
+
+  action(l_queueMemRdReq, "lr", desc="Read data from memory") {
+    peek(regDir_in, CPURequestMsg) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) {
+          out_msg.addr := address;
+          out_msg.Type := TriggerType:L3Hit;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        tbe.DataBlk := entry.DataBlk;
+        tbe.LastSender := entry.LastSender;
+        tbe.L3Hit := true;
+        tbe.MemData := true;
+        DPRINTF(RubySlicc, "L3 data is %s\n", entry.DataBlk);
+        L3CacheMemory.deallocate(address);
+      } else {
+        queueMemoryRead(machineID, address, to_memory_controller_latency);
+      }
+    }
+  }
+
+  action(lrp_queueMemRdReqP, "lrp", desc="Read data from memory") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) {
+          out_msg.addr := address;
+          out_msg.Type := TriggerType:L3Hit;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        tbe.DataBlk := entry.DataBlk;
+        tbe.LastSender := entry.LastSender;
+        tbe.L3Hit := true;
+        tbe.MemData := true;
+        DPRINTF(RubySlicc, "L3 data is %s\n", entry.DataBlk);
+        L3CacheMemory.deallocate(address);
+      } else {
+        queueMemoryRead(machineID, address, to_memory_controller_latency);
+      }
+    }
+  }
+
+  action(dcr_probeInvCoreData, "dcr", desc="probe inv cores, return data") {
+    peek(regBuf_in, CPURequestMsg) {
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbInv;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination := in_msg.Sharers;
+        tbe.NumPendingAcks := tbe.NumPendingAcks + in_msg.Sharers.count();
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+        APPEND_TRANSITION_COMMENT(" dcr: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(ddr_probeDownCoreData, "ddr", desc="probe inv cores, return data") {
+    peek(regBuf_in, CPURequestMsg) {
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbDowngrade;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination := in_msg.Sharers;
+        tbe.NumPendingAcks := tbe.NumPendingAcks + in_msg.Sharers.count();
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+        APPEND_TRANSITION_COMMENT(" dcr: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
+    peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbDowngrade;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination.broadcast(MachineType:CorePair);  // won't be realistic for multisocket
+        tbe.NumPendingAcks := tbe.NumPendingAcks +machineCount(MachineType:CorePair) - 1;
+        out_msg.Destination.broadcast(MachineType:TCP);
+        tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:TCP);
+        out_msg.Destination.broadcast(MachineType:SQC);
+        tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:SQC);
+        out_msg.Destination.remove(in_msg.Requestor);
+        DPRINTF(RubySlicc, "%s\n", (out_msg));
+        APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") {
+    peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbInv;
+        out_msg.ReturnData := false;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination.broadcast(MachineType:CorePair);  // won't be realistic for multisocket
+        tbe.NumPendingAcks := tbe.NumPendingAcks +machineCount(MachineType:CorePair) - 1;
+        out_msg.Destination.broadcast(MachineType:TCP);
+        tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:TCP);
+        out_msg.Destination.broadcast(MachineType:SQC);
+        tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:SQC);
+        out_msg.Destination.remove(in_msg.Requestor);
+        APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(d_writeDataToMemory, "d", desc="Write data to memory") {
+    peek(responseNetwork_in, ResponseMsg) {
+      getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+      DPRINTF(RubySlicc, "Writing Data: %s to address %s\n", in_msg.DataBlk,
+              in_msg.addr);
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    peek(regDir_in, CPURequestMsg) {
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        tbe.writeMask.clear();
+        tbe.writeMask.orMask(in_msg.writeMask);
+        tbe.wtData := true;
+        tbe.WTRequestor := in_msg.WTRequestor;
+        tbe.LastSender := in_msg.Requestor;
+      }
+      if (in_msg.Type == CoherenceRequestType:Atomic) {
+        tbe.writeMask.clear();
+        tbe.writeMask.orMask(in_msg.writeMask);
+        tbe.atomicData := true;
+        tbe.WTRequestor := in_msg.WTRequestor;
+        tbe.LastSender := in_msg.Requestor;
+      }
+      tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+      tbe.Dirty := false;
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask);
+        tbe.Dirty := false;
+      }
+      tbe.OriginalRequestor := in_msg.Requestor;
+      tbe.NumPendingAcks := 0;
+      tbe.Cached := in_msg.ForceShared;
+      tbe.InitialRequestTime := in_msg.InitialRequestTime;
+      tbe.ForwardRequestTime := curCycle();
+      tbe.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+      tbe.DemandRequest := in_msg.DemandRequest;
+    }
+  }
+
+  action(tp_allocateTBEP, "tp", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    peek(requestNetwork_in, CPURequestMsg) {
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        tbe.writeMask.clear();
+        tbe.writeMask.orMask(in_msg.writeMask);
+        tbe.wtData := true;
+        tbe.WTRequestor := in_msg.WTRequestor;
+        tbe.LastSender := in_msg.Requestor;
+      }
+      if (in_msg.Type == CoherenceRequestType:Atomic) {
+        tbe.writeMask.clear();
+        tbe.writeMask.orMask(in_msg.writeMask);
+        tbe.atomicData := true;
+        tbe.WTRequestor := in_msg.WTRequestor;
+        tbe.LastSender := in_msg.Requestor;
+      }
+      tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+      tbe.Dirty := false;
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask);
+        tbe.Dirty := false;
+      }
+      tbe.OriginalRequestor := in_msg.Requestor;
+      tbe.NumPendingAcks := 0;
+      tbe.Cached := in_msg.ForceShared;
+      tbe.InitialRequestTime := in_msg.InitialRequestTime;
+      tbe.ForwardRequestTime := curCycle();
+      tbe.ProbeRequestStartTime := in_msg.ProbeRequestStartTime;
+      tbe.DemandRequest := false;
+    }
+  }
+
+  action(sa_setAcks, "sa", desc="setAcks") {
+    peek(regDir_in, CPURequestMsg) {
+        tbe.NumPendingAcks := in_msg.Acks;
+        APPEND_TRANSITION_COMMENT(" waiting for acks ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+    }
+  }
+
+  action(tr_allocateTBE, "tr", desc="allocate TBE Entry for Region inv") {
+    check_allocate(TBEs);
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+    tbe.NumPendingAcks := 0;
+  }
+
+  action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(wdp_writeBackDataPrivate, "wdp", desc="Write back data if needed") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        tbe.DataBlkAux := getDirectoryEntry(address).DataBlk;
+        tbe.DataBlkAux.copyPartial(in_msg.DataBlk,in_msg.writeMask);
+        getDirectoryEntry(address).DataBlk := tbe.DataBlkAux;
+      } else{
+        assert(in_msg.Type == CoherenceRequestType:Atomic);
+        tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask);
+        getDirectoryEntry(address).DataBlk := tbe.DataBlkAux;
+      }
+    }
+  }
+
+  action(wd_writeBackData, "wd", desc="Write back data if needed") {
+    if (tbe.wtData) {
+      DataBlock tmp := getDirectoryEntry(address).DataBlk;
+      tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+      tbe.DataBlk := tmp;
+      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    } else if (tbe.atomicData) {
+      tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask);
+      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    } else if (tbe.Dirty == true) {
+      APPEND_TRANSITION_COMMENT(" Wrote data back ");
+      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    }
+  }
+
+  action(wdi_writeBackDataInv, "wdi", desc="Write back inv data if needed") {
+    // Kind of opposite from above...?
+    if (tbe.Dirty == true) {
+      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+      APPEND_TRANSITION_COMMENT("Writing dirty data to dir");
+      DPRINTF(RubySlicc, "Data %s: %s\n", address, tbe.DataBlk);
+    } else {
+      APPEND_TRANSITION_COMMENT("NOT!!! Writing dirty data to dir");
+    }
+  }
+
+  action(wdt_writeBackDataInvNoTBE, "wdt", desc="Write back inv data if needed no TBE") {
+    // Kind of opposite from above...?
+    peek(responseNetwork_in, ResponseMsg) {
+      if (in_msg.Dirty == true) {
+        getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+        APPEND_TRANSITION_COMMENT("Writing dirty data to dir");
+        DPRINTF(RubySlicc, "Data %s: %s\n", address, in_msg.DataBlk);
+      } else {
+        APPEND_TRANSITION_COMMENT("NOT!!! Writing dirty data to dir");
+      }
+    }
+  }
+
+  action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") {
+    peek(memQueue_in, MemoryMsg) {
+      if (tbe.Dirty == false) {
+        tbe.DataBlk := getDirectoryEntry(address).DataBlk;
+      }
+      tbe.MemData := true;
+    }
+  }
+
+  action(ml_writeL3DataToTBE, "ml", desc="write L3 data to TBE") {
+    assert(tbe.Dirty == false);
+    CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+    tbe.DataBlk := entry.DataBlk;
+    tbe.LastSender := entry.LastSender;
+    tbe.L3Hit := true;
+    tbe.MemData := true;
+  }
+
+  action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (in_msg.Dirty) {
+        DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender);
+        DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk);
+        if (tbe.wtData) {
+          DataBlock tmp := in_msg.DataBlk;
+          tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+          tbe.DataBlk := tmp;
+        } else if (tbe.Dirty) {
+          if(tbe.atomicData == false && tbe.wtData == false) {
+            DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
+            assert(tbe.DataBlk == in_msg.DataBlk);  // in case of double data
+          }
+        } else {
+          tbe.DataBlk := in_msg.DataBlk;
+          tbe.Dirty := in_msg.Dirty;
+          tbe.LastSender := in_msg.Sender;
+        }
+      }
+      if (in_msg.Hit) {
+        tbe.Cached := true;
+      }
+    }
+  }
+
+  action(yc_writeCPUDataToTBE, "yc", desc="write CPU Data to TBE") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (in_msg.Dirty) {
+        DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender);
+        DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk);
+        if (tbe.Dirty) {
+          DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
+          assert(tbe.DataBlk == in_msg.DataBlk);  // in case of double data
+        }
+        tbe.DataBlk := in_msg.DataBlk;
+        tbe.Dirty := false;
+        tbe.LastSender := in_msg.Sender;
+      }
+    }
+  }
+
+  action(x_decrementAcks, "x", desc="decrement Acks pending") {
+    if (tbe.NumPendingAcks > 0) {
+      tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+    } else {
+      APPEND_TRANSITION_COMMENT(" Double ack! ");
+    }
+    assert(tbe.NumPendingAcks >= 0);
+    APPEND_TRANSITION_COMMENT(" Acks remaining: ");
+    APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+  }
+
+  action(o_checkForCompletion, "o", desc="check for ack completion") {
+    if (tbe.NumPendingAcks == 0 && tbe.TriggeredAcksComplete == false) {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:AcksComplete;
+      }
+      tbe.TriggeredAcksComplete := true;
+    }
+    APPEND_TRANSITION_COMMENT(" Check: Acks remaining: ");
+    APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+  }
+
+  action(ont_checkForCompletionNoTrigger, "ont", desc="check for ack completion, no trigger") {
+    if (tbe.NumPendingAcks == 0 && tbe.TriggeredAcksComplete == false) {
+      tbe.TriggeredAcksComplete := true;
+    }
+    APPEND_TRANSITION_COMMENT(" Check: Acks remaining: ");
+    APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+  }
+
+  action(rvp_removeVicDirtyIgnore, "rvp", desc="Remove ignored core") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor);
+    }
+  }
+
+  action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") {
+    peek(regDir_in, CPURequestMsg) {
+      getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor);
+    }
+  }
+
+  action(r_sendRequestToRegionDir, "r", desc="send request to Region Directory") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(requestNetworkReg_out, CPURequestMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := in_msg.Type;
+        out_msg.Requestor := in_msg.Requestor;
+        out_msg.Destination.add(map_Address_to_RegionDir(address));
+        out_msg.Shared := in_msg.Shared;
+        out_msg.MessageSize := in_msg.MessageSize;
+        DPRINTF(RubySlicc, "out dest: %s\n", map_Address_to_RegionDir(address));
+      }
+    }
+  }
+
+  action(ai_ackInvalidate, "ai", desc="Ack to let the reg-dir know that the inv is ordered") {
+    peek(regBuf_in, CPURequestMsg) {
+      enqueue(regAckNetwork_out, UnblockMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        DPRINTF(RubySlicc, "ai out_msg: %s\n", out_msg);
+      }
+    }
+  }
+
+  action(aic_ackInvalidate, "aic", desc="Ack to let the reg-dir know that the inv is ordered") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (in_msg.NoAckNeeded == false) {
+        enqueue(regAckNetwork_out, UnblockMsg, 1) {
+          out_msg.addr := address;
+          if (machineIDToMachineType(in_msg.Sender) == MachineType:CorePair) {
+            out_msg.Destination.add(createMachineID(MachineType:RegionBuffer, intToID(0)));
+          } else {
+            out_msg.Destination.add(createMachineID(MachineType:RegionBuffer, intToID(1)));
+          }
+          out_msg.MessageSize := MessageSizeType:Response_Control;
+          DPRINTF(RubySlicc, "ai out_msg: %s\n", out_msg);
+          out_msg.wasValid := in_msg.isValid;
+        }
+      }
+    }
+  }
+
+  action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+        entry.DataBlk := in_msg.DataBlk;
+        entry.LastSender := in_msg.Sender;
+      } else {
+        if (L3CacheMemory.cacheAvail(address) == false) {
+          Addr victim := L3CacheMemory.cacheProbe(address);
+          CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+                                                 L3CacheMemory.lookup(victim));
+          queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+                           victim_entry.DataBlk);
+          L3CacheMemory.deallocate(victim);
+        }
+        assert(L3CacheMemory.cacheAvail(address));
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+        entry.DataBlk := in_msg.DataBlk;
+        entry.LastSender := in_msg.Sender;
+      }
+    }
+  }
+
+  action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") {
+    if ((tbe.wtData || tbe.atomicData) && useL3OnWT) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+        entry.DataBlk := tbe.DataBlk;
+        entry.LastSender := tbe.LastSender;
+      } else {
+        if (L3CacheMemory.cacheAvail(address) == false) {
+          Addr victim := L3CacheMemory.cacheProbe(address);
+          CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+                                                 L3CacheMemory.lookup(victim));
+          queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+                           victim_entry.DataBlk);
+          L3CacheMemory.deallocate(victim);
+        }
+        assert(L3CacheMemory.cacheAvail(address));
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+        entry.DataBlk := tbe.DataBlk;
+        entry.LastSender := tbe.LastSender;
+      }
+    }
+  }
+
+  action(ali_allocateL3Block, "ali", desc="allocate the L3 block on ForceInv") {
+    if (tbe.Dirty == true) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+        entry.DataBlk := tbe.DataBlk;
+        entry.LastSender := tbe.LastSender;
+      } else {
+        if (L3CacheMemory.cacheAvail(address) == false) {
+          Addr victim := L3CacheMemory.cacheProbe(address);
+          CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+                                                 L3CacheMemory.lookup(victim));
+          queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+                           victim_entry.DataBlk);
+          L3CacheMemory.deallocate(victim);
+        }
+        assert(L3CacheMemory.cacheAvail(address));
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+        entry.DataBlk := tbe.DataBlk;
+        entry.LastSender := tbe.LastSender;
+      }
+    }
+  }
+
+  action(ali_allocateL3BlockNoTBE, "alt", desc="allocate the L3 block on ForceInv no TBE") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (in_msg.Dirty) {
+        if (L3CacheMemory.isTagPresent(address)) {
+          CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+          APPEND_TRANSITION_COMMENT(" ali wrote data to L3 (hit) ");
+          entry.DataBlk := in_msg.DataBlk;
+          entry.LastSender := in_msg.Sender;
+        } else {
+          if (L3CacheMemory.cacheAvail(address) == false) {
+            Addr victim := L3CacheMemory.cacheProbe(address);
+            CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+                                                   L3CacheMemory.lookup(victim));
+            queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+                             victim_entry.DataBlk);
+            L3CacheMemory.deallocate(victim);
+          }
+          assert(L3CacheMemory.cacheAvail(address));
+          CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+          APPEND_TRANSITION_COMMENT(" ali wrote data to L3 ");
+          entry.DataBlk := in_msg.DataBlk;
+          entry.LastSender := in_msg.Sender;
+        }
+      }
+    }
+  }
+
+  action(dl_deallocateL3, "dl", desc="deallocate the L3 block") {
+    L3CacheMemory.deallocate(address);
+  }
+
+  action(p_popRequestQueue, "p", desc="pop request queue") {
+    requestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(prd_popRegionQueue, "prd", desc="pop request queue") {
+    regDir_in.dequeue(clockEdge());
+  }
+
+  action(prb_popRegionBufQueue, "prb", desc="pop request queue") {
+    regBuf_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="pop response queue") {
+    responseNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pm_popMemQueue, "pm", desc="pop mem queue") {
+    memQueue_in.dequeue(clockEdge());
+  }
+
+  action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
+    triggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") {
+    L3TriggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(pu_popUnblockQueue, "pu", desc="pop unblock queue") {
+    unblockNetwork_in.dequeue(clockEdge());
+  }
+
+  action(yy_recycleResponseQueue, "yy", desc="recycle response queue") {
+    responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(ww_stallAndWaitRegRequestQueue, "ww", desc="recycle region dir request queue") {
+    stall_and_wait(regDir_in, address);
+  }
+
+  action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") {
+    stall_and_wait(requestNetwork_in, address);
+  }
+
+  action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") {
+    wakeUpBuffers(address);
+  }
+
+  action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") {
+    wakeUpAllBuffers();
+  }
+
+  action(z_stall, "z", desc="...") {
+  }
+
+  // TRANSITIONS
+
+  // transitions from U
+
+  transition({BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {Inv, Downgrade}) {
+      ww_stallAndWaitRegRequestQueue;
+  }
+
+  transition(U, Inv, BI){L3TagArrayRead} {
+    tr_allocateTBE;
+    dcr_probeInvCoreData; // only need to invalidate sharers
+    ai_ackInvalidate;
+    prb_popRegionBufQueue;
+  }
+
+  transition(U, Downgrade, BI){L3TagArrayRead} {
+    tr_allocateTBE;
+    ddr_probeDownCoreData; // only need to invalidate sharers
+    ai_ackInvalidate;
+    prb_popRegionBufQueue;
+  }
+
+  // The next 2 transistions are needed in the event that an invalidation
+  // is waiting for its ack from the core, but the event makes it through
+  // the region directory before the acks. This wouldn't be needed if
+  // we waited to ack the region dir until the directory got all the acks
+  transition({BR, BW, BI, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, WriteThrough, Atomic}) {
+      ww_stallAndWaitRegRequestQueue;
+  }
+
+  transition({BR, BW, BI, BL, BS_M, BM_M, B_M, BS_PM, BM_PM, B_PM, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {RdBlkSP, RdBlkMP, RdBlkP}) {
+      st_stallAndWaitRequest;
+  }
+
+  transition({BR, BW, BI, BL, BS_M, BM_M, B_M, BS_PM, BM_PM, B_PM, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {WriteThroughP,AtomicP}) {
+      st_stallAndWaitRequest;
+  }
+
+  transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead} {
+    t_allocateTBE;
+    l_queueMemRdReq;
+    sa_setAcks;
+    o_checkForCompletion;
+    ra_ackRegionDir;
+    prd_popRegionQueue;
+  }
+
+  transition(U, WriteThrough, BM_PM){L3TagArrayRead} {
+    t_allocateTBE;
+    w_sendResponseWBAck;
+    l_queueMemRdReq;
+    sa_setAcks;
+    o_checkForCompletion;
+    ra_ackRegionDir;
+    prd_popRegionQueue;
+  }
+
+  transition(U, {RdBlkM,Atomic}, BM_PM){L3TagArrayRead} {
+    t_allocateTBE;
+    l_queueMemRdReq;
+    sa_setAcks;
+    o_checkForCompletion;
+    ra_ackRegionDir;
+    prd_popRegionQueue;
+  }
+
+  transition(U, RdBlk, B_PM){L3TagArrayRead} {
+    t_allocateTBE;
+    l_queueMemRdReq;
+    sa_setAcks;
+    o_checkForCompletion;
+    ra_ackRegionDir;
+    prd_popRegionQueue;
+  }
+
+  transition(U, {RdBlkSP}, BS_M) {L3TagArrayRead} {
+    tp_allocateTBEP;
+    lrp_queueMemRdReqP;
+    p_popRequestQueue;
+  }
+
+  transition(U, WriteThroughP, BM_M) {L3TagArrayRead} {
+    tp_allocateTBEP;
+    wp_sendResponseWBAckP;
+    lrp_queueMemRdReqP;
+    p_popRequestQueue;
+  }
+
+  transition(U, {RdBlkMP,AtomicP}, BM_M) {L3TagArrayRead} {
+    tp_allocateTBEP;
+    lrp_queueMemRdReqP;
+    p_popRequestQueue;
+  }
+
+  transition(U, RdBlkP, B_M) {L3TagArrayRead} {
+    tp_allocateTBEP;
+    lrp_queueMemRdReqP;
+    p_popRequestQueue;
+  }
+
+  transition(U, VicDirtyP, BL) {L3TagArrayRead}  {
+    tp_allocateTBEP;
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(U, VicCleanP, BL) {L3TagArrayRead} {
+    tp_allocateTBEP;
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(BM_Pm, RdBlkSP, BM_Pm_B) {L3DataArrayWrite} {
+    sb_sendResponseSBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BS_Pm, RdBlkSP, BS_Pm_B) {L3DataArrayWrite} {
+    sb_sendResponseSBypass;
+    p_popRequestQueue;
+  }
+
+  transition(B_Pm, RdBlkSP, B_Pm_B) {L3DataArrayWrite} {
+    sb_sendResponseSBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BP, RdBlkSP, BP_B) {L3DataArrayWrite} {
+    sb_sendResponseSBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BM_Pm, RdBlkMP, BM_Pm_B) {L3DataArrayWrite} {
+    mb_sendResponseMBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BS_Pm, RdBlkMP, BS_Pm_B) {L3DataArrayWrite} {
+    mb_sendResponseMBypass;
+    p_popRequestQueue;
+  }
+
+  transition(B_Pm, RdBlkMP, B_Pm_B) {L3DataArrayWrite} {
+    mb_sendResponseMBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BP, RdBlkMP, BP_B) {L3DataArrayWrite} {
+    mb_sendResponseMBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BM_Pm, {WriteThroughP,AtomicP}, BM_Pm_B) {L3DataArrayWrite} {
+    wdp_writeBackDataPrivate;
+    mbwt_sendResponseWriteThroughBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BS_Pm, {WriteThroughP,AtomicP}, BS_Pm_B) {L3DataArrayWrite} {
+    wdp_writeBackDataPrivate;
+    mbwt_sendResponseWriteThroughBypass;
+    p_popRequestQueue;
+  }
+
+  transition(B_Pm, {WriteThroughP,AtomicP}, B_Pm_B) {L3DataArrayWrite} {
+    wdp_writeBackDataPrivate;
+    mbwt_sendResponseWriteThroughBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BP, {WriteThroughP,AtomicP}, BP_B) {L3DataArrayWrite} {
+    wdp_writeBackDataPrivate;
+    mbwt_sendResponseWriteThroughBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BM_Pm, RdBlkP, BM_Pm_B) {L3DataArrayWrite} {
+    esb_sendResponseESBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BS_Pm, RdBlkP, BS_Pm_B) {L3DataArrayWrite} {
+    esb_sendResponseESBypass;
+    p_popRequestQueue;
+  }
+
+  transition(B_Pm, RdBlkP, B_Pm_B)  {L3DataArrayWrite}{
+    esb_sendResponseESBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BP, RdBlkP, BP_B)  {L3DataArrayWrite}{
+    esb_sendResponseESBypass;
+    p_popRequestQueue;
+  }
+
+  transition(BM_Pm_B, CoreUnblock, BM_Pm) {
+    wa_wakeUpDependents;
+    pu_popUnblockQueue;
+  }
+
+  transition(BS_Pm_B, CoreUnblock, BS_Pm) {
+    wa_wakeUpDependents;
+    pu_popUnblockQueue;
+  }
+
+  transition(B_Pm_B, CoreUnblock, B_Pm) {
+    wa_wakeUpDependents;
+    pu_popUnblockQueue;
+  }
+
+  transition(BP_B, CoreUnblock, BP) {
+    wa_wakeUpDependents;
+    pu_popUnblockQueue;
+  }
+
+  transition(BM_Pm_B, UnblockWriteThrough, BM_Pm) {
+    wa_wakeUpDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(BS_Pm_B, UnblockWriteThrough, BS_Pm) {
+    wa_wakeUpDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(B_Pm_B, UnblockWriteThrough, B_Pm) {
+    wa_wakeUpDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(BP_B, UnblockWriteThrough, BP) {
+    wa_wakeUpDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(BM_Pm, VicDirtyP, BM_Pm_BL) {
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(BS_Pm, VicDirtyP, BS_Pm_BL) {
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(B_Pm, VicDirtyP, B_Pm_BL) {
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(BP, VicDirtyP, BP_BL) {
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(BM_Pm, VicCleanP, BM_Pm_BL) {
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(BS_Pm, VicCleanP, BS_Pm_BL) {
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(B_Pm, VicCleanP, B_Pm_BL) {
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(BP, VicCleanP, BP_BL) {
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition(BM_Pm_BL, CPUData, BM_Pm) {
+    yc_writeCPUDataToTBE;
+    d_writeDataToMemory;
+    wa_wakeUpDependents;
+    pr_popResponseQueue;
+  }
+
+  transition(BS_Pm_BL, CPUData, BS_Pm) {
+    yc_writeCPUDataToTBE;
+    d_writeDataToMemory;
+    wa_wakeUpDependents;
+    pr_popResponseQueue;
+  }
+
+  transition(B_Pm_BL, CPUData, B_Pm) {
+    yc_writeCPUDataToTBE;
+    d_writeDataToMemory;
+    wa_wakeUpDependents;
+    pr_popResponseQueue;
+  }
+
+  transition(BP_BL, CPUData, BP) {
+    yc_writeCPUDataToTBE;
+    d_writeDataToMemory;
+    wa_wakeUpDependents;
+    pr_popResponseQueue;
+  }
+
+  transition({BR, BW, BL}, {VicDirtyP, VicCleanP}) {
+      st_stallAndWaitRequest;
+  }
+
+  transition({BR, BW, BL}, {VicDirty, VicClean}) {
+      ww_stallAndWaitRegRequestQueue;
+  }
+
+  transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} {
+    dt_deallocateTBE;
+    d_writeDataToMemory;
+    al_allocateL3Block;
+    wa_wakeUpDependents;
+    pr_popResponseQueue;
+  }
+
+  transition(BL, StaleWB, U) {L3TagArrayWrite} {
+    dt_deallocateTBE;
+    wa_wakeUpAllDependents;
+    pr_popResponseQueue;
+  }
+
+  transition({BI, B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {VicDirty, VicClean}) {
+    ww_stallAndWaitRegRequestQueue;
+  }
+
+  transition({BI, B, BS_M, BM_M, B_M, BS_PM, BM_PM, B_PM, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {VicDirtyP, VicCleanP}) {
+      st_stallAndWaitRequest;
+  }
+
+  transition({U, BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, WBAck) {
+    pm_popMemQueue;
+  }
+
+  transition({U, BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, StaleVicDirtyP) {
+    rvp_removeVicDirtyIgnore;
+    wp_sendResponseWBAckP;
+    p_popRequestQueue;
+  }
+
+  transition({U, BR, BW, BL, BI, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B, BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, StaleVicDirty) {
+    rv_removeVicDirtyIgnore;
+    w_sendResponseWBAck;
+    prd_popRegionQueue;
+  }
+
+  transition(U, VicDirty, BL) {L3TagArrayRead} {
+      t_allocateTBE;
+      ra_ackRegionDir;
+      w_sendResponseWBAck;
+      prd_popRegionQueue;
+  }
+
+  transition(U, VicClean, BL) {L3TagArrayRead} {
+      t_allocateTBE;
+      ra_ackRegionDir;
+      w_sendResponseWBAck;
+      prd_popRegionQueue;
+  }
+
+  transition({B, BR}, CoreUnblock, U) {
+    wa_wakeUpDependents;
+    pu_popUnblockQueue;
+  }
+
+  transition({B, BR}, UnblockWriteThrough, U) {
+    wa_wakeUpDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(BS_M, MemData, B) {L3TagArrayWrite, L3DataArrayWrite} {
+    mt_writeMemDataToTBE;
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+    mt_writeMemDataToTBE;
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+  transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+    mt_writeMemDataToTBE;
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BS_PM, MemData, BS_Pm) {} {
+    mt_writeMemDataToTBE;
+    wa_wakeUpDependents;
+    pm_popMemQueue;
+  }
+
+  transition(BM_PM, MemData, BM_Pm){} {
+    mt_writeMemDataToTBE;
+    wa_wakeUpDependents;
+    pm_popMemQueue;
+  }
+
+  transition(B_PM, MemData, B_Pm){} {
+    mt_writeMemDataToTBE;
+    wa_wakeUpDependents;
+    pm_popMemQueue;
+  }
+
+  transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    ptl_popTriggerQueue;
+  }
+
+  transition(BM_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    ptl_popTriggerQueue;
+  }
+
+  transition(B_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    ptl_popTriggerQueue;
+  }
+
+  transition(BS_PM, L3Hit, BS_Pm) {
+    wa_wakeUpDependents;
+    ptl_popTriggerQueue;
+  }
+
+  transition(BM_PM, L3Hit, BM_Pm) {
+    wa_wakeUpDependents;
+    ptl_popTriggerQueue;
+  }
+
+  transition(B_PM, L3Hit, B_Pm) {
+    wa_wakeUpDependents;
+    ptl_popTriggerQueue;
+  }
+
+  transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BP, BI}, CPUPrbResp) {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    ont_checkForCompletionNoTrigger;
+    pr_popResponseQueue;
+  }
+
+  transition({B, B_M, BS_M, BM_M}, {CPUPrbResp, LastCPUPrbResp}) {
+    z_stall;
+  }
+
+  transition({BS_Pm_BL, BM_Pm_BL, B_Pm_BL, BP_BL, BS_Pm_B, BM_Pm_B, B_Pm_B, BP_B}, {CPUPrbResp, LastCPUPrbResp}) {
+    // recycling because PrbResponse and data come on the same network
+    yy_recycleResponseQueue;
+  }
+
+  transition(U, {CPUPrbResp, LastCPUPrbResp}) {L3TagArrayRead, L3DataArrayWrite} {
+    aic_ackInvalidate;
+    wdt_writeBackDataInvNoTBE;
+    ali_allocateL3BlockNoTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(BL, {CPUPrbResp, LastCPUPrbResp}) {} {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    wdi_writeBackDataInv;
+    ali_allocateL3Block;
+    pr_popResponseQueue;
+  }
+
+  transition(BS_PM, LastCPUPrbResp, BS_M) {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    ont_checkForCompletionNoTrigger;
+    pr_popResponseQueue;
+  }
+
+  transition(BS_PM, ProbeAcksComplete, BS_M) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(BM_PM, LastCPUPrbResp, BM_M) {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    ont_checkForCompletionNoTrigger;
+    pr_popResponseQueue;
+  }
+
+  transition(BM_PM, ProbeAcksComplete, BM_M) {} {
+    pt_popTriggerQueue;
+  }
+
+  transition(B_PM, LastCPUPrbResp, B_M) {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    ont_checkForCompletionNoTrigger;
+    pr_popResponseQueue;
+  }
+
+  transition(B_PM, ProbeAcksComplete, B_M){} {
+    pt_popTriggerQueue;
+  }
+
+  transition(BS_Pm, LastCPUPrbResp, B) {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    ont_checkForCompletionNoTrigger;
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    ali_allocateL3Block;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    ali_allocateL3Block;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(BM_Pm, LastCPUPrbResp, B) {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    ont_checkForCompletionNoTrigger;
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    ali_allocateL3Block;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    ali_allocateL3Block;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(B_Pm, LastCPUPrbResp, B) {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    ont_checkForCompletionNoTrigger;
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    ali_allocateL3Block;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    ali_allocateL3Block;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(BP, LastCPUPrbResp, B) {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    ont_checkForCompletionNoTrigger;
+    c_sendResponseCtoD;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
+    c_sendResponseCtoD;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(BI, LastCPUPrbResp, B) {
+    aic_ackInvalidate;
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    ont_checkForCompletionNoTrigger;
+    wa_wakeUpDependents;
+    wdi_writeBackDataInv;
+    ali_allocateL3Block;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(BI, ProbeAcksComplete, U) {L3TagArrayWrite, L3DataArrayWrite}{
+    wa_wakeUpDependents;
+    wdi_writeBackDataInv;
+    ali_allocateL3Block;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-msg.sm b/src/mem/protocol/MOESI_AMD_Base-Region-msg.sm
new file mode 100644
index 000000000..823933e57
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-Region-msg.sm
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+enumeration(CoherenceRequestType, desc="Coherence Request Types") {
+  // CPU Request Types ONLY
+  RdBlk,        desc="Read Blk";
+  RdBlkM,       desc="Read Blk Modified";
+  RdBlkS,       desc="Read Blk Shared";
+  VicClean,     desc="L2 clean eviction";
+  VicDirty,     desc="L2 dirty eviction";
+
+  WrCancel,     desc="want to cancel WB to Memory"; // should this be here?
+
+  WBApproval,   desc="WB Approval";
+
+  // Messages between Dir and R-Dir
+  ForceInv,     desc="Send invalide to the block";
+  ForceDowngrade, desc="Send downgrade to the block";
+  Unblock,      desc="Used to let the dir know a message has been sunk";
+
+  // Messages between R-Dir and R-Buffer
+  PrivateNotify, desc="Let region buffer know it has private access";
+  SharedNotify,  desc="Let region buffer know it has shared access";
+  WbNotify,      desc="Let region buffer know it saw its wb request";
+  Downgrade,     desc="Force the region buffer to downgrade to shared";
+  // Response to R-Dir (probably should be on a different network, but
+  // I need it to be ordered with respect to requests)
+  InvAck,       desc="Let the R-Dir know when the inv has occured";
+
+  PrivateRequest, desc="R-buf wants the region in private";
+  UpgradeRequest, desc="R-buf wants the region in private";
+  SharedRequest,  desc="R-buf wants the region in shared (could respond with private)";
+  CleanWbRequest, desc="R-buf wants to deallocate clean region";
+
+  NA,             desc="So we don't get segfaults";
+}
+
+enumeration(ProbeRequestType, desc="Probe Request Types") {
+  PrbDowngrade,    desc="Probe for Status";  // EtoS, MtoO, StoS
+  PrbInv,       desc="Probe to Invalidate";
+
+  // For regions
+  PrbRepl,      desc="Force the cache to do a replacement";
+  PrbRegDowngrade, desc="Probe for Status";  // EtoS, MtoO, StoS
+}
+
+
+enumeration(CoherenceResponseType, desc="Coherence Response Types") {
+  NBSysResp,       desc="Northbridge response to CPU Rd request";
+  NBSysWBAck,      desc="Northbridge response ok to WB";
+  TDSysResp,       desc="TCCdirectory response to CPU Rd request";
+  TDSysWBAck,      desc="TCCdirectory response ok to WB";
+  TDSysWBNack,      desc="TCCdirectory response ok to drop";
+  CPUPrbResp,      desc="CPU Probe Response";
+  CPUData,         desc="CPU Data";
+  StaleNotif,      desc="Notification of Stale WBAck, No data to writeback";
+  CPUCancelWB,     desc="want to cancel WB to Memory";
+  MemData,         desc="Data from Memory";
+
+  // for regions
+  PrivateAck,      desc="Ack that r-buf received private notify";
+  RegionWbAck, desc="Writeback Ack that r-buf completed deallocation";
+  DirReadyAck, desc="Directory (mem ctrl)<->region dir handshake";
+}
+
+enumeration(CoherenceState, default="CoherenceState_NA", desc="Coherence State") {
+  Modified,             desc="Modified";
+  Owned,                desc="Owned state";
+  Exclusive,            desc="Exclusive";
+  Shared,               desc="Shared";
+  NA,                   desc="NA";
+}
+
+structure(CPURequestMsg, desc="...", interface="Message") {
+  Addr addr,             desc="Physical address for this request";
+  Addr DemandAddress,       desc="Physical block address for this request";
+  CoherenceRequestType Type,   desc="Type of request";
+  DataBlock DataBlk,           desc="data for the cache line";  // only for WB
+  bool Dirty,                   desc="whether WB data is dirty";  // only for WB
+  MachineID Requestor,            desc="Node who initiated the request";
+  NetDest Destination,             desc="Multicast destination mask";
+  bool Shared,                  desc="For CPU_WrVicBlk, vic is O not M.  For CPU_ClVicBlk, vic is S";
+  MessageSizeType MessageSize, desc="size category of the message";
+  Cycles InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
+  Cycles ForwardRequestTime, default="0", desc="time the dir forwarded the request";
+  Cycles ProbeRequestStartTime, default="0", desc="the time the dir started the probe request";
+  bool DemandRequest, default="false", desc="For profiling purposes";
+
+  NetDest Sharers,              desc="Caches that may have a valid copy of the data";
+  bool ForceShared,             desc="R-dir knows it is shared, pass on so it sends an S copy, not E";
+  bool Private, default="false", desc="Requestor already has private permissions, no need for dir check";
+  bool CtoDSinked, default="false", desc="This is true if the CtoD previously sent must have been sunk";
+
+  bool NoAckNeeded, default="false", desc="True if region buffer doesn't need to ack";
+  int Acks, default="0", desc="Acks that the dir (mem ctrl) should expect to receive";
+  CoherenceRequestType OriginalType, default="CoherenceRequestType_NA",  desc="Type of request from core fwded through region buffer";
+
+  bool functionalRead(Packet *pkt) {
+    // Only PUTX messages contains the data block
+    if (Type == CoherenceRequestType:VicDirty) {
+        return testAndRead(addr, DataBlk, pkt);
+    }
+
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return testAndWrite(addr, DataBlk, pkt);
+  }
+}
+
+structure(NBProbeRequestMsg, desc="...", interface="Message") {
+  Addr addr,              desc="Physical address for this request";
+  ProbeRequestType Type,             desc="probe signal";
+  bool ReturnData,              desc="Indicates CPU should return data";
+  NetDest Destination,             desc="Node to whom the data is sent";
+  MessageSizeType MessageSize, desc="size category of the message";
+  bool DemandRequest, default="false", desc="demand request, requesting 3-hop transfer";
+  Addr DemandAddress,        desc="Demand block address for a region request";
+  MachineID Requestor,          desc="Requestor id for 3-hop requests";
+  bool NoAckNeeded, default="false", desc="For short circuting acks";
+
+  bool functionalRead(Packet *pkt) {
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return false;
+  }
+
+}
+
+structure(TDProbeRequestMsg, desc="...", interface="Message") {
+  Addr addr,              desc="Physical address for this request";
+  ProbeRequestType Type,             desc="TD_PrbNxtState signal";
+  bool ReturnData,              desc="Indicates CPU should return data";
+  bool localCtoD,              desc="Indicates CtoD is within the GPU hierarchy (aka TCC subtree)";
+  NetDest Destination,             desc="Node to whom the data is sent";
+  MessageSizeType MessageSize, desc="size category of the message";
+  MachineID Sender,               desc="Node who sent the data";
+  bool currentOwner, default="false", desc="Is the sender the current owner";
+  bool DoneAck, default="false", desc="Is this a done ack?";
+  bool Dirty, default="false", desc="Was block dirty when evicted";
+  bool wasValid, default="false", desc="Was block valid when evicted";
+  bool valid, default="false", desc="Is block valid";
+  bool validToInvalid, default="false", desc="Was block valid when evicted";
+
+  bool functionalRead(Packet *pkt) {
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return false;
+  }
+}
+
+// Response Messages seemed to be easily munged into one type
+structure(ResponseMsg, desc="...", interface="Message") {
+  Addr addr,             desc="Physical address for this request";
+  CoherenceResponseType Type,  desc="NB Sys Resp or CPU Response to Probe";
+  MachineID Sender,               desc="Node who sent the data";
+  NetDest Destination,             desc="Node to whom the data is sent";
+  // Begin Used Only By CPU Response
+  DataBlock DataBlk,           desc="data for the cache line";
+  bool Hit,                    desc="probe hit valid line";
+  bool Shared,                 desc="True if S, or if NB Probe ReturnData==1 && O";
+  bool Dirty,                  desc="Is the data dirty (different than memory)?";
+  bool Ntsl,                   desc="indicates probed lin will be invalid after probe";
+  bool UntransferredOwner,     desc="pending confirmation of ownership change";
+  // End Used Only By CPU Response
+
+  // Begin NB Response Only
+  CoherenceState State, default=CoherenceState_NA,        desc="What returned data from NB should be in";
+  bool CtoD,                    desc="was the originator a CtoD?";
+  // End NB Response Only
+
+  bool NbReqShared,             desc="modification of Shared field from initial request, e.g. hit by shared probe";
+
+  MessageSizeType MessageSize, desc="size category of the message";
+  Cycles InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
+  Cycles ForwardRequestTime, default="0", desc="time the dir forwarded the request";
+  Cycles ProbeRequestStartTime, default="0", desc="the time the dir started the probe request";
+  bool DemandRequest, default="false", desc="For profiling purposes";
+
+  bool L3Hit, default="false", desc="Did memory or L3 supply the data?";
+  MachineID OriginalResponder, desc="Mach which wrote the data to the L3";
+
+  bool NotCached, default="false", desc="True when the Region buffer has already evicted the line";
+
+  bool NoAckNeeded, default="false", desc="For short circuting acks";
+  bool isValid, default="false", desc="Is acked block valid";
+
+  bool functionalRead(Packet *pkt) {
+    // Only PUTX messages contains the data block
+    if (Type == CoherenceResponseType:CPUData ||
+        Type == CoherenceResponseType:MemData) {
+        return testAndRead(addr, DataBlk, pkt);
+    }
+
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return testAndWrite(addr, DataBlk, pkt);
+  }
+}
+
+structure(UnblockMsg, desc="...", interface="Message") {
+  Addr addr,              desc="Physical address for this request";
+  NetDest Destination,          desc="Destination (always directory)";
+  MessageSizeType MessageSize, desc="size category of the message";
+}
+
+enumeration(TriggerType, desc="Trigger Type") {
+  L2_to_L1,             desc="L2 to L1 fill";
+  AcksComplete,         desc="NB received all needed Acks";
+
+  // For regions
+  InvNext,              desc="Invalidate the next block";
+  PrivateAck,           desc="Loopback ack for machines with no Region Buffer";
+  AllOutstanding,       desc="All outstanding requests have finished";
+  L3Hit,                desc="L3 hit in dir";
+
+  // For region directory once the directory is blocked
+  InvRegion,            desc="Invalidate region";
+  DowngradeRegion,      desc="downgrade region";
+}
+
+enumeration(CacheId, desc="Which Cache in the Core") {
+  L1I,          desc="L1 I-cache";
+  L1D0,         desc="L1 D-cache cluster 0";
+  L1D1,         desc="L1 D-cache cluster 1";
+  NA,           desc="Default";
+}
+
+structure(TriggerMsg, desc="...", interface="Message") {
+  Addr addr,              desc="Address";
+  TriggerType Type,             desc="Type of trigger";
+  CacheId Dest,         default="CacheId_NA", desc="Cache to invalidate";
+
+  bool functionalRead(Packet *pkt) {
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return false;
+  }
+
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm
new file mode 100644
index 000000000..89f7d6fcb
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm
@@ -0,0 +1,1368 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Jason Power
+ */
+
+machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol")
+: CacheMemory *cacheMemory; // stores only region addresses. Must set block size same as below
+  bool isOnCPU;
+  int blocksPerRegion := 64; // 4k regions
+  Cycles toDirLatency := 5;     // Latency to fwd requests to directory
+  Cycles toRegionDirLatency := 5; // Latency for requests and acks to directory
+  Cycles nextEvictLatency := 1;   // latency added between each block while evicting region
+  bool noTCCdir := "False";
+  int TCC_select_num_bits := 1;
+
+  // From the Cores
+  MessageBuffer * requestFromCore, network="From", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseFromCore, network="From", virtual_network="2", vnet_type="response";
+
+  // Requests to the cores or directory
+  MessageBuffer * requestToNetwork, network="To", virtual_network="0", vnet_type="request";
+
+  // From Region-Dir
+  MessageBuffer * notifyFromRegionDir, network="From", virtual_network="7", vnet_type="request";
+  MessageBuffer * probeFromRegionDir, network="From", virtual_network="8", vnet_type="request";
+
+  // From the directory
+  MessageBuffer * unblockFromDir, network="From", virtual_network="4", vnet_type="unblock";
+
+  // To the region-Dir
+  MessageBuffer * responseToRegDir, network="To", virtual_network="2", vnet_type="response";
+
+  MessageBuffer * triggerQueue;
+{
+
+  // States
+  state_declaration(State, desc="Region states", default="RegionBuffer_State_NP") {
+    NP, AccessPermission:Invalid,       desc="Not present in region directory";
+    P,  AccessPermission:Invalid,       desc="Region is private to the cache";
+    S,  AccessPermission:Invalid,       desc="Region is possibly shared with others";
+
+    NP_PS, AccessPermission:Invalid,    desc="Intermediate state waiting for notify from r-dir";
+    S_P,  AccessPermission:Invalid,     desc="Intermediate state while upgrading region";
+
+    P_NP, AccessPermission:Invalid,     desc="Intermediate state while evicting all lines in region";
+    P_S,  AccessPermission:Invalid,     desc="Intermediate state while downgrading all lines in region";
+
+    S_NP_PS, AccessPermission:Invalid,  desc="Got an inv in S_P, waiting for all inv acks, then going to since the write is already out there NP_PS";
+    P_NP_NP,  AccessPermission:Invalid,    desc="Evicting region on repl, then got an inv. Need to re-evict";
+
+    P_NP_O, AccessPermission:Invalid,     desc="Waiting for all outstanding requests";
+    P_S_O,  AccessPermission:Invalid,     desc="Waiting for all outstanding requests";
+    S_O,  AccessPermission:Invalid,       desc="Waiting for all outstanding requests";
+    S_NP_PS_O, AccessPermission:Invalid,  desc="Waiting for all outstanding requests";
+
+    SS_P, AccessPermission:Invalid,  desc="Waiting for CPU write that we know is there";
+
+    P_NP_W, AccessPermission:Invalid,     desc="Waiting for writeback ack";
+
+    NP_W, AccessPermission:Invalid,     desc="Got a done ack before request, waiting for that victim";
+  }
+
+  enumeration(Event, desc="Region directory events") {
+    CPURead,        desc="Access from CPU core";
+    CPUWrite,       desc="Access from CPU core";
+    CPUWriteback,       desc="Writeback request from CPU core";
+
+    ReplRegion,     desc="Start a replace on a region";
+
+    PrivateNotify,  desc="Update entry to private state";
+    SharedNotify,   desc="Update entry to shared state";
+    WbNotify,       desc="Writeback notification received";
+    InvRegion,      desc="Start invalidating a region";
+    DowngradeRegion,desc="Start invalidating a region";
+
+    InvAck,         desc="Ack from core";
+
+    DoneAck,        desc="Ack from core that request has finished";
+    AllOutstanding, desc="All outstanding requests have now finished";
+
+    Evict,          desc="Loopback to evict each block";
+    LastAck_PrbResp, desc="Done eviciting all the blocks, got the last ack from core, now respond to region dir";
+    LastAck_CleanWb, desc="Done eviciting all the blocks, got the last ack from core, now start clean writeback (note the dir has already been updated)";
+
+    StallAccess,    desc="Wait for the done ack on the address before proceeding";
+    StallDoneAck,   desc="Wait for the access on the address before proceeding";
+
+    StaleRequest,   desc="Got a stale victim from the cache, fwd it without incrementing outstanding";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+  structure(BoolVec, external="yes") {
+    bool at(int);
+    void resize(int);
+    void clear();
+    int size();
+  }
+
+  structure(Entry, desc="Region entry", interface="AbstractCacheEntry") {
+    Addr addr,        desc="Base address of this region";
+    State RegionState,      desc="Region state";
+    DataBlock DataBlk,      desc="Data for the block (always empty in region buffer)";
+    BoolVec ValidBlocks,    desc="A vector to keep track of valid blocks";
+    int NumValidBlocks,     desc="Number of trues in ValidBlocks to avoid iterating";
+    BoolVec UsedBlocks,     desc="A vector to keep track of blocks ever valid";
+    bool dirty,           desc="Dirty as best known by the region buffer";
+    // This is needed so we don't ack an invalidate until all requests are ordered
+    int NumOutstandingReqs,    desc="Total outstanding private/shared requests";
+    BoolVec OutstandingReqs,   desc="Blocks that have outstanding private/shared requests";
+    bool MustDowngrade,     desc="Set when we got a downgrade before the shd or pvt permissions";
+    Cycles ProbeRequestTime, default="Cycles(0)", desc="Time region dir started the probe";
+    Cycles InitialRequestTime, default="Cycles(0)", desc="Time message was sent to region dir";
+    bool MsgSentToDir,      desc="True if the current request required a message to the dir";
+    bool clearOnDone, default="false", desc="clear valid bit when request completes";
+    Addr clearOnDoneAddr, desc="clear valid bit when request completes";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,         desc="Transient state";
+    //int NumValidBlocks,     desc="Number of blocks valid so we don't have to count a BoolVec";
+    BoolVec ValidBlocks,    desc="A vector to keep track of valid blocks";
+    bool AllAcksReceived,   desc="Got all necessary acks from dir";
+    bool DoneEvicting,      desc="Done iterating through blocks checking for valids";
+    BoolVec AcksReceived,   desc="Received acks for theses blocks\n";
+    bool SendAck,           desc="If true, send an ack to the r-dir at end of inv";
+    ProbeRequestType MsgType, desc="Type of message to send while 'evicting' ";
+    int NumOutstandingReqs,    desc="Total outstanding private/shared requests";
+    BoolVec OutstandingReqs,   desc="Blocks that have outstanding private/shared requests";
+    MachineID Requestor,    desc="Requestor for three hop transactions";
+    bool DemandRequest, default="false", desc="Associated with a demand request";
+    Addr DemandAddress,  desc="Address for the demand request";
+    bool DoneAckReceived, default="false", desc="True if the done ack arrived before the message";
+    Addr DoneAckAddr,     desc="Address of the done ack received early";
+    int OutstandingThreshold, desc="Number of outstanding requests to trigger AllOutstanding on";
+
+    ProbeRequestType NewMsgType, desc="Type of message to send while 'evicting' ";
+    MachineID NewRequestor,    desc="Requestor for three hop transactions";
+    bool NewDemandRequest, default="false", desc="Associated with a demand request";
+    Addr NewDemandAddress,  desc="Address for the demand request";
+    bool dirty, desc="dirty";
+    bool AllOutstandingTriggered, default="false", desc="bit for only one all outstanding";
+    int OutstandingAcks, default="0", desc="number of acks to wait for";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  // Stores only region addresses
+  TBETable TBEs, template="<RegionBuffer_TBE>", constructor="m_number_of_TBEs";
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  int blockBits,  default="RubySystem::getBlockSizeBits()";
+  int blockBytes, default="RubySystem::getBlockSizeBytes()";
+  int regionBits, default="log2(m_blocksPerRegion)";
+
+  // Functions
+
+  int getRegionOffset(Addr addr) {
+    if (blocksPerRegion > 1) {
+      Addr offset := bitSelect(addr, blockBits, regionBits+blockBits-1);
+      int ret := addressToInt(offset);
+      assert(ret < blocksPerRegion);
+      return ret;
+    } else {
+      return 0;
+    }
+  }
+
+  Addr getRegionBase(Addr addr) {
+    return maskLowOrderBits(addr, blockBits+regionBits);
+  }
+
+  Addr getNextBlock(Addr addr) {
+    Addr a := addr;
+    return makeNextStrideAddress(a, 1);
+  }
+
+  MachineID getPeer(MachineID mach, Addr address) {
+    if (isOnCPU) {
+      return createMachineID(MachineType:CorePair, intToID(0));
+    } else if (noTCCdir) {
+      return mapAddressToRange(address,MachineType:TCC,
+                                  TCC_select_low_bit, TCC_select_num_bits);
+    } else {
+      return createMachineID(MachineType:TCCdir, intToID(0));
+    }
+  }
+
+  bool isOutstanding(TBE tbe, Entry cache_entry, Addr addr) {
+      if (is_valid(tbe) && tbe.OutstandingReqs.size() > 0) {
+          DPRINTF(RubySlicc, " outstanding tbe reqs %s %s %d %d\n",
+                  tbe.OutstandingReqs, addr, getRegionOffset(addr),
+                  tbe.OutstandingReqs.at(getRegionOffset(addr)));
+          return tbe.OutstandingReqs.at(getRegionOffset(addr));
+      } else if (is_valid(cache_entry)) {
+          DPRINTF(RubySlicc, " outstanding cache reqs %s %s %d %d\n",
+                  cache_entry.OutstandingReqs, addr, getRegionOffset(addr),
+                  cache_entry.OutstandingReqs.at(getRegionOffset(addr)));
+          return cache_entry.OutstandingReqs.at(getRegionOffset(addr));
+      } else {
+          return false;
+      }
+  }
+
+  bool isOnGPU() {
+    if (isOnCPU) {
+      return false;
+    }
+    return true;
+  }
+
+  bool isRead(CoherenceRequestType type) {
+    return (type == CoherenceRequestType:RdBlk || type == CoherenceRequestType:RdBlkS ||
+            type == CoherenceRequestType:VicClean);
+  }
+
+  bool presentOrAvail(Addr addr) {
+    return cacheMemory.isTagPresent(getRegionBase(addr)) || cacheMemory.cacheAvail(getRegionBase(addr));
+  }
+
+  // Returns a region entry!
+  Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+    return static_cast(Entry, "pointer", cacheMemory.lookup(getRegionBase(addr)));
+  }
+
+  TBE getTBE(Addr addr), return_by_pointer="yes" {
+    return TBEs.lookup(getRegionBase(addr));
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    return getCacheEntry(getRegionBase(addr)).DataBlk;
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if (is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.RegionState;
+    }
+    return State:NP;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+        tbe.TBEState := state;
+    }
+    if (is_valid(cache_entry)) {
+        cache_entry.RegionState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := getTBE(addr);
+    if(is_valid(tbe)) {
+      return RegionBuffer_State_to_permission(tbe.TBEState);
+    }
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return RegionBuffer_State_to_permission(cache_entry.RegionState);
+    }
+    return AccessPermission:NotPresent;
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    functionalMemoryRead(pkt);
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    if (functionalMemoryWrite(pkt)) {
+      return 1;
+    } else {
+      return 0;
+    }
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(RegionBuffer_State_to_permission(state));
+    }
+  }
+
+  void recordRequestType(RequestType stat, Addr addr) {
+    if (stat == RequestType:TagArrayRead) {
+        cacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (stat == RequestType:TagArrayWrite) {
+        cacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:TagArrayRead) {
+      return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+
+  // Overloaded outgoing request nework for both probes to cores and reqeusts
+  // to the directory.
+  // Fix Me: These forwarded requests need to be on a separate virtual channel
+  // to avoid deadlock!
+  out_port(requestNetwork_out, CPURequestMsg, requestToNetwork);
+  out_port(probeNetwork_out, NBProbeRequestMsg, requestToNetwork);
+
+  out_port(responseNetwork_out, ResponseMsg, responseToRegDir);
+
+  in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=4) {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := getTBE(in_msg.addr);
+        DPRINTF(RubySlicc, "trigger msg: %s (%s)\n", in_msg, getRegionBase(in_msg.addr));
+        assert(is_valid(tbe));
+        if (in_msg.Type == TriggerType:AcksComplete) {
+            if (tbe.SendAck) {
+                trigger(Event:LastAck_PrbResp, in_msg.addr, cache_entry, tbe);
+            } else {
+                trigger(Event:LastAck_CleanWb, in_msg.addr, cache_entry, tbe);
+            }
+        } else if (in_msg.Type == TriggerType:AllOutstanding) {
+          trigger(Event:AllOutstanding, in_msg.addr, cache_entry, tbe);
+        } else {
+          assert(in_msg.Type == TriggerType:InvNext);
+          trigger(Event:Evict, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+  in_port(unblockNetwork_in, UnblockMsg, unblockFromDir, rank=3) {
+    if (unblockNetwork_in.isReady(clockEdge())) {
+      peek(unblockNetwork_in, UnblockMsg) {
+        TBE tbe := getTBE(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.DoneAck) {
+          if (isOutstanding(tbe, cache_entry, in_msg.addr)) {
+            trigger(Event:DoneAck, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:StallDoneAck, in_msg.addr, cache_entry, tbe);
+          }
+        } else {
+          assert(is_valid(tbe));
+          trigger(Event:InvAck, in_msg.addr, cache_entry, tbe);
+        }
+      }
+    }
+  }
+
+  in_port(probeNetwork_in, NBProbeRequestMsg, probeFromRegionDir, rank=2) {
+    if (probeNetwork_in.isReady(clockEdge())) {
+      peek(probeNetwork_in, NBProbeRequestMsg) {
+        TBE tbe := getTBE(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        assert(getRegionBase(in_msg.addr) == in_msg.addr);
+        if (in_msg.Type == ProbeRequestType:PrbInv) {
+          trigger(Event:InvRegion, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
+          trigger(Event:DowngradeRegion, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unknown probe message\n");
+        }
+      }
+    }
+  }
+
+  in_port(notifyNetwork_in, CPURequestMsg, notifyFromRegionDir, rank=1) {
+    if (notifyNetwork_in.isReady(clockEdge())) {
+      peek(notifyNetwork_in, CPURequestMsg) {
+        TBE tbe := getTBE(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        //Fix Me...add back in: assert(is_valid(cache_entry));
+        if (in_msg.Type == CoherenceRequestType:WbNotify) {
+          trigger(Event:WbNotify, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:SharedNotify) {
+          trigger(Event:SharedNotify, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:PrivateNotify) {
+          trigger(Event:PrivateNotify, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unknown notify message\n");
+        }
+      }
+    }
+  }
+
+  // In from cores
+  // NOTE: We get the cache / TBE entry based on the region address,
+  //       but pass the block address to the actions
+  in_port(requestNetwork_in, CPURequestMsg, requestFromCore, rank=0) {
+    if (requestNetwork_in.isReady(clockEdge())) {
+      peek(requestNetwork_in, CPURequestMsg) {
+        TBE tbe := getTBE(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (is_valid(tbe) && tbe.DoneAckReceived && tbe.DoneAckAddr == in_msg.addr) {
+            DPRINTF(RubySlicc, "Stale/Stall request %s\n", in_msg.Type);
+          if (in_msg.Type == CoherenceRequestType:VicDirty || in_msg.Type == CoherenceRequestType:VicClean )
+          {
+            trigger(Event:StaleRequest, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:StallAccess, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (isOutstanding(tbe, cache_entry, in_msg.addr)) {
+          DPRINTF(RubySlicc, "Stall outstanding request %s\n", in_msg.Type);
+          trigger(Event:StallAccess, in_msg.addr, cache_entry, tbe);
+        } else {
+        if (presentOrAvail(in_msg.addr)) {
+          if (in_msg.Type == CoherenceRequestType:RdBlkM ) {
+            trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:WriteThrough ) {
+            trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+          } else if (in_msg.Type == CoherenceRequestType:Atomic ) {
+            trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
+          } else {
+              if (in_msg.Type == CoherenceRequestType:VicDirty ||
+                  in_msg.Type == CoherenceRequestType:VicClean) {
+                  trigger(Event:CPUWriteback, in_msg.addr, cache_entry, tbe);
+              } else {
+                  trigger(Event:CPURead, in_msg.addr, cache_entry, tbe);
+              }
+          }
+        } else {
+          Addr victim := cacheMemory.cacheProbe(getRegionBase(in_msg.addr));
+          TBE victim_tbe := getTBE(victim);
+          Entry victim_entry := getCacheEntry(victim);
+          DPRINTF(RubySlicc, "Replacing region %s for %s(%s)\n", victim, in_msg.addr, getRegionBase(in_msg.addr));
+          trigger(Event:ReplRegion, victim, victim_entry, victim_tbe);
+        }
+        }
+      }
+    }
+  }
+
+  // Actions
+  action(f_fwdReqToDir, "f", desc="Forward CPU request to directory") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+        out_msg.addr := in_msg.addr;
+        out_msg.Type := in_msg.Type;
+        out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.Dirty := in_msg.Dirty;
+        out_msg.Requestor := in_msg.Requestor;
+        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+        out_msg.Shared := in_msg.Shared;
+        out_msg.MessageSize := in_msg.MessageSize;
+        out_msg.Private := true;
+        out_msg.InitialRequestTime := curCycle();
+        out_msg.ProbeRequestStartTime := curCycle();
+        if (getState(tbe, cache_entry, address) == State:S) {
+          out_msg.ForceShared := true;
+        }
+        DPRINTF(RubySlicc, "Fwd: %s\n", out_msg);
+        //assert(getState(tbe, cache_entry, address) == State:P || getState(tbe, cache_entry, address) == State:S);
+        if (getState(tbe, cache_entry, address) == State:NP_W) {
+          APPEND_TRANSITION_COMMENT(" fwding stale request: ");
+          APPEND_TRANSITION_COMMENT(out_msg.Type);
+        }
+      }
+    }
+  }
+
+  action(u_updateRegionEntry, "u", desc="Update the entry for profiling") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      if (is_valid(cache_entry)) {
+        if (in_msg.CtoDSinked == false) {
+          APPEND_TRANSITION_COMMENT(" incr outstanding ");
+          cache_entry.NumOutstandingReqs := 1 + cache_entry.NumOutstandingReqs;
+          assert(cache_entry.OutstandingReqs.at(getRegionOffset(address)) == false);
+          cache_entry.OutstandingReqs.at(getRegionOffset(address)) := true;
+          assert(cache_entry.NumOutstandingReqs == countBoolVec(cache_entry.OutstandingReqs));
+        } else {
+          APPEND_TRANSITION_COMMENT(" NOT incr outstanding ");
+          assert(in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:RdBlkS);
+        }
+        APPEND_TRANSITION_COMMENT(cache_entry.NumOutstandingReqs);
+        if (in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:Atomic ||
+            in_msg.Type == CoherenceRequestType:WriteThrough )
+        {
+          cache_entry.dirty := true;
+        }
+        if (in_msg.Type == CoherenceRequestType:VicDirty ||
+            in_msg.Type == CoherenceRequestType:VicClean) {
+            DPRINTF(RubySlicc, "Got %s for addr %s\n", in_msg.Type, address);
+            //assert(cache_entry.ValidBlocks.at(getRegionOffset(address)));
+            // can in fact be inv if core got an inv after a vicclean before it got here
+            if (cache_entry.ValidBlocks.at(getRegionOffset(address))) {
+                cache_entry.clearOnDone := true;
+                cache_entry.clearOnDoneAddr := address;
+                //cache_entry.ValidBlocks.at(getRegionOffset(address)) := false;
+                //cache_entry.NumValidBlocks := cache_entry.NumValidBlocks - 1;
+            }
+        } else {
+            if (cache_entry.ValidBlocks.at(getRegionOffset(address)) == false) {
+              cache_entry.NumValidBlocks := cache_entry.NumValidBlocks + 1;
+            }
+            DPRINTF(RubySlicc, "before valid addr %s bits %s\n",
+                    in_msg.Type, address, cache_entry.ValidBlocks);
+            cache_entry.ValidBlocks.at(getRegionOffset(address)) := true;
+            DPRINTF(RubySlicc, "after valid addr %s bits %s\n",
+                    in_msg.Type, address, cache_entry.ValidBlocks);
+            cache_entry.UsedBlocks.at(getRegionOffset(address)) := true;
+        }
+        assert(cache_entry.NumValidBlocks <= blocksPerRegion);
+        assert(cache_entry.NumValidBlocks >= 0);
+        APPEND_TRANSITION_COMMENT(" valid blocks ");
+        APPEND_TRANSITION_COMMENT(cache_entry.ValidBlocks);
+      } else {
+        error("This shouldn't happen anymore I think");
+        //tbe.ValidBlocks.at(getRegionOffest(address)) := true;
+        assert(getState(tbe, cache_entry, address) == State:P_NP);
+      }
+    }
+  }
+
+  action(uw_updatePossibleWriteback, "uw", desc="writeback request complete") {
+      peek(unblockNetwork_in, UnblockMsg) {
+          if (is_valid(cache_entry) && in_msg.validToInvalid &&
+              cache_entry.clearOnDone && cache_entry.clearOnDoneAddr == address) {
+              DPRINTF(RubySlicc, "I have no idea what is going on here\n");
+              cache_entry.ValidBlocks.at(getRegionOffset(address)) := false;
+              cache_entry.NumValidBlocks := cache_entry.NumValidBlocks - 1;
+              cache_entry.clearOnDone := false;
+          }
+      }
+  }
+
+
+  action(rp_requestPrivate, "rp", desc="Send private request r-dir") {
+      peek(requestNetwork_in, CPURequestMsg) {
+          // No need to send acks on replacements
+          assert(is_invalid(tbe));
+          enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) {
+              out_msg.addr := address; // use the actual address so the demand request can be fulfilled
+              out_msg.DemandAddress := address;
+              out_msg.Type := CoherenceRequestType:PrivateRequest;
+              out_msg.OriginalType := in_msg.Type;
+              out_msg.Requestor := machineID;
+              out_msg.WTRequestor := in_msg.WTRequestor;
+              out_msg.InitialRequestTime := curCycle();
+              // will this always be ok? probably not for multisocket
+              out_msg.Destination.add(map_Address_to_RegionDir(address));
+              out_msg.MessageSize := MessageSizeType:Request_Control;
+              DPRINTF(RubySlicc, "Private request %s\n", out_msg);
+          }
+          cache_entry.ProbeRequestTime := curCycle();
+          cache_entry.MsgSentToDir := true;
+          APPEND_TRANSITION_COMMENT(getRegionBase(address));
+      }
+  }
+
+  action(ru_requestUpgrade, "ru", desc="Send upgrade request r-dir") {
+      peek(requestNetwork_in, CPURequestMsg) {
+          // No need to send acks on replacements
+          assert(is_invalid(tbe));
+          enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) {
+              out_msg.addr := address; // use the actual address so the demand request can be fulfilled
+              out_msg.Type := CoherenceRequestType:UpgradeRequest;
+              out_msg.OriginalType := in_msg.Type;
+              out_msg.Requestor := machineID;
+              out_msg.WTRequestor := in_msg.WTRequestor;
+              out_msg.InitialRequestTime := curCycle();
+              // will this always be ok? probably not for multisocket
+              out_msg.Destination.add(map_Address_to_RegionDir(address));
+              out_msg.MessageSize := MessageSizeType:Request_Control;
+          }
+          cache_entry.ProbeRequestTime := curCycle();
+          cache_entry.MsgSentToDir := true;
+          APPEND_TRANSITION_COMMENT(getRegionBase(address));
+      }
+  }
+
+  action(rw_requestWriteback, "rq", desc="Send writeback request") {
+    // No need to send acks on replacements
+    enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) {
+        out_msg.addr := getRegionBase(address); // use the actual address so the demand request can be fulfilled
+        out_msg.Type := CoherenceRequestType:CleanWbRequest;
+        out_msg.Requestor := machineID;
+        out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+        out_msg.MessageSize := MessageSizeType:Request_Control;
+        out_msg.Dirty := tbe.dirty;
+          APPEND_TRANSITION_COMMENT(getRegionBase(address));
+    }
+  }
+
+  action(rs_requestShared, "rs", desc="Send shared request r-dir") {
+      peek(requestNetwork_in, CPURequestMsg) {
+          // No need to send acks on replacements
+          assert(is_invalid(tbe));
+          enqueue(requestNetwork_out, CPURequestMsg, toRegionDirLatency) {
+              out_msg.addr := address; // use the actual address so the demand request can be fulfilled
+              out_msg.Type := CoherenceRequestType:SharedRequest;
+              out_msg.OriginalType := in_msg.Type;
+              out_msg.Requestor := machineID;
+              out_msg.WTRequestor := in_msg.WTRequestor;
+              out_msg.InitialRequestTime := curCycle();
+              // will this always be ok? probably not for multisocket
+              out_msg.Destination.add(map_Address_to_RegionDir(address));
+              out_msg.MessageSize := MessageSizeType:Request_Control;
+          }
+          cache_entry.ProbeRequestTime := curCycle();
+          cache_entry.MsgSentToDir := true;
+          APPEND_TRANSITION_COMMENT(getRegionBase(address));
+      }
+  }
+
+  action(ai_ackRegionInv, "ai", desc="Send ack to r-dir on region inv if tbe says so") {
+    // No need to send acks on replacements
+    assert(is_valid(tbe));
+    enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+        out_msg.addr := getRegionBase(address);
+        out_msg.Type := CoherenceResponseType:CPUPrbResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(ad_ackDircetory, "ad", desc="send probe response to directory") {
+    if (noTCCdir && tbe.MsgType == ProbeRequestType:PrbDowngrade && isOnGPU()) { //VIPER tcc doesnt understand PrbShrData
+      assert(tbe.DemandRequest);                                    //So, let RegionBuffer take care of sending back ack
+      enqueue(responseNetwork_out, ResponseMsg, toDirLatency) {
+          out_msg.addr := tbe.DemandAddress;
+          out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+          out_msg.Sender := getPeer(machineID,address);
+          out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+          out_msg.Dirty := false;  // only true if sending back data i think
+          out_msg.Hit := false;
+          out_msg.Ntsl := false;
+          out_msg.State := CoherenceState:NA;
+          out_msg.NoAckNeeded := true;
+          out_msg.MessageSize := MessageSizeType:Response_Control;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+    }
+  }
+
+  action(aie_ackRegionExclusiveInv, "aie", desc="Send ack to r-dir on region inv if tbe says so") {
+    // No need to send acks on replacements
+    assert(is_valid(tbe));
+    enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+        out_msg.addr := getRegionBase(address);
+        out_msg.Type := CoherenceResponseType:CPUPrbResp;
+        out_msg.Sender := machineID;
+        out_msg.NotCached := true;
+        out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        out_msg.Dirty := tbe.dirty;
+    }
+  }
+
+  action(ain_ackRegionInvNow, "ain", desc="Send ack to r-dir on region inv") {
+    enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+      out_msg.addr := getRegionBase(address);
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(aine_ackRegionInvExlusiveNow, "aine", desc="Send ack to r-dir on region inv with exlusive permission") {
+    enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+      out_msg.addr := getRegionBase(address);
+      out_msg.Type := CoherenceResponseType:CPUPrbResp;
+      out_msg.Sender := machineID;
+      out_msg.NotCached := true;
+      out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(ap_ackPrivateNotify, "ap", desc="Send ack to r-dir on private notify") {
+    enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+      out_msg.addr := getRegionBase(address);
+      out_msg.Type := CoherenceResponseType:PrivateAck;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(aw_ackWbNotify, "aw", desc="Send ack to r-dir on writeback notify") {
+    peek(notifyNetwork_in, CPURequestMsg) {
+      if (in_msg.NoAckNeeded == false) {
+        enqueue(responseNetwork_out, ResponseMsg, toRegionDirLatency) {
+          out_msg.addr := getRegionBase(address);
+          out_msg.Type := CoherenceResponseType:RegionWbAck;
+          out_msg.Sender := machineID;
+          out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket
+          out_msg.MessageSize := MessageSizeType:Response_Control;
+        }
+      }
+    }
+  }
+
+  action(e_evictCurrent, "e", desc="Evict this block in the region") {
+    // send force invalidate message to directory to invalidate this block
+    // must invalidate all blocks since region buffer could have privitized it
+      if (tbe.ValidBlocks.at(getRegionOffset(address)) &&
+          (tbe.DemandRequest == false || tbe.DemandAddress != address)) {
+          DPRINTF(RubySlicc, "trying to evict address %s (base: %s, offset: %d)\n", address, getRegionBase(address), getRegionOffset(address));
+          DPRINTF(RubySlicc, "tbe valid blocks %s\n", tbe.ValidBlocks);
+
+        enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+            out_msg.addr := address;
+            out_msg.Type := tbe.MsgType;
+            out_msg.ReturnData := true;
+            if (address == tbe.DemandAddress) {
+                out_msg.DemandRequest := true;
+            }
+            out_msg.MessageSize := MessageSizeType:Control;
+            out_msg.Destination.add(getPeer(machineID,address));
+            DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+        APPEND_TRANSITION_COMMENT(" current ");
+        APPEND_TRANSITION_COMMENT(tbe.ValidBlocks.at(getRegionOffset(address)));
+        tbe.AllAcksReceived := false;
+      } else {
+          DPRINTF(RubySlicc, "Not evicting demand %s\n", address);
+      }
+  }
+
+  action(ed_evictDemand, "ed", desc="Evict the demand request if it's valid") {
+    if (noTCCdir && tbe.MsgType == ProbeRequestType:PrbDowngrade && isOnGPU()) {
+      tbe.OutstandingAcks := 0;
+      tbe.AllAcksReceived := true;
+      tbe.DoneEvicting := true;
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+          out_msg.Type := TriggerType:AcksComplete;
+          out_msg.addr := getRegionBase(address);
+      }
+    } else if (tbe.DemandRequest) {
+      enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+        out_msg.addr := tbe.DemandAddress;
+        out_msg.Type := tbe.MsgType;
+        out_msg.ReturnData := true;
+        out_msg.DemandRequest := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination.add(getPeer(machineID,address));
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+        tbe.AllAcksReceived := false;
+      }
+      if (tbe.ValidBlocks.at(getRegionOffset(tbe.DemandAddress)) == false) {
+          tbe.OutstandingAcks := tbe.OutstandingAcks + 1;
+      }
+      APPEND_TRANSITION_COMMENT("Evicting demand ");
+      APPEND_TRANSITION_COMMENT(tbe.DemandAddress);
+    }
+    APPEND_TRANSITION_COMMENT("waiting acks ");
+    APPEND_TRANSITION_COMMENT(tbe.OutstandingAcks);
+  }
+
+  action(adp_AckDemandProbe, "fp", desc="forward demand probe even if we know that the core is invalid") {
+    peek(probeNetwork_in, NBProbeRequestMsg) {
+        if (in_msg.DemandRequest) {
+            enqueue(responseNetwork_out, ResponseMsg, toDirLatency) {
+                out_msg.addr := in_msg.DemandAddress;
+                out_msg.Type := CoherenceResponseType:CPUPrbResp;  // L3 and CPUs respond in same way to probes
+                out_msg.Sender := getPeer(machineID,address);
+                out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket
+                out_msg.Dirty := false;  // only true if sending back data i think
+                out_msg.Hit := false;
+                out_msg.Ntsl := false;
+                out_msg.State := CoherenceState:NA;
+                out_msg.NoAckNeeded := true;
+                out_msg.MessageSize := MessageSizeType:Response_Control;
+                DPRINTF(RubySlicc, "%s\n", out_msg);
+            }
+        }
+    }
+  }
+
+  action(en_enqueueNextEvict, "en", desc="Queue evict the next block in the region") {
+    // increment in_msg.addr by blockSize bytes and enqueue on triggerPort
+    // Only enqueue if the next address doesn't overrun the region bound
+    if (getRegionBase(getNextBlock(address)) == getRegionBase(address)) {
+        enqueue(triggerQueue_out, TriggerMsg, nextEvictLatency) {
+            out_msg.Type := TriggerType:InvNext;
+            out_msg.addr := getNextBlock(address);
+        }
+    } else {
+        tbe.DoneEvicting := true;
+        DPRINTF(RubySlicc, "Done evicing region %s\n", getRegionBase(address));
+        DPRINTF(RubySlicc, "Waiting for %s acks\n", tbe.OutstandingAcks);
+        if (tbe.AllAcksReceived == true) {
+            enqueue(triggerQueue_out, TriggerMsg, 1) {
+                out_msg.Type := TriggerType:AcksComplete;
+                out_msg.addr := getRegionBase(address);
+            }
+        }
+    }
+  }
+
+  action(ef_enqueueFirstEvict, "ef", desc="Queue the first block in the region to be evicted") {
+    if (tbe.DoneEvicting == false) {
+      enqueue(triggerQueue_out, TriggerMsg, nextEvictLatency) {
+          out_msg.Type := TriggerType:InvNext;
+          out_msg.addr := getRegionBase(address);
+      }
+    }
+  }
+
+  action(ra_receiveAck, "ra", desc="Mark TBE entry as received this ack") {
+      DPRINTF(RubySlicc, "received ack for %s reg: %s vec: %s pos: %d\n",
+              address, getRegionBase(address), tbe.ValidBlocks, getRegionOffset(address));
+      peek(unblockNetwork_in, UnblockMsg) {
+          //
+          // Note the tbe ValidBlock vec will be a conservative list of the
+          // valid blocks since the cache entry ValidBlock vec is set on the
+          // request
+          //
+          if (in_msg.wasValid) {
+              assert(tbe.ValidBlocks.at(getRegionOffset(address)));
+          }
+      }
+      tbe.OutstandingAcks := tbe.OutstandingAcks - 1;
+      tbe.AcksReceived.at(getRegionOffset(address)) := true;
+      assert(tbe.OutstandingAcks >= 0);
+      if (tbe.OutstandingAcks == 0) {
+          tbe.AllAcksReceived := true;
+          if (tbe.DoneEvicting) {
+              enqueue(triggerQueue_out, TriggerMsg, 1) {
+                  out_msg.Type := TriggerType:AcksComplete;
+                  out_msg.addr := getRegionBase(address);
+              }
+          }
+      }
+
+      APPEND_TRANSITION_COMMENT(getRegionBase(address));
+      APPEND_TRANSITION_COMMENT(" Acks left receive ");
+      APPEND_TRANSITION_COMMENT(tbe.OutstandingAcks);
+  }
+
+  action(do_decrementOutstanding, "do", desc="Decrement outstanding requests") {
+    APPEND_TRANSITION_COMMENT(" decr outstanding ");
+    if (is_valid(cache_entry)) {
+      cache_entry.NumOutstandingReqs := cache_entry.NumOutstandingReqs - 1;
+      assert(cache_entry.OutstandingReqs.at(getRegionOffset(address)));
+      cache_entry.OutstandingReqs.at(getRegionOffset(address)) := false;
+      assert(cache_entry.NumOutstandingReqs >= 0);
+      assert(cache_entry.NumOutstandingReqs == countBoolVec(cache_entry.OutstandingReqs));
+      APPEND_TRANSITION_COMMENT(cache_entry.NumOutstandingReqs);
+    }
+    if (is_valid(tbe)) {
+      tbe.NumOutstandingReqs := tbe.NumOutstandingReqs - 1;
+      assert(tbe.OutstandingReqs.at(getRegionOffset(address)));
+      tbe.OutstandingReqs.at(getRegionOffset(address)) := false;
+      assert(tbe.NumOutstandingReqs >= 0);
+      assert(tbe.NumOutstandingReqs == countBoolVec(tbe.OutstandingReqs));
+      APPEND_TRANSITION_COMMENT(tbe.NumOutstandingReqs);
+    }
+  }
+
+  action(co_checkOutstanding, "co", desc="check if there are no more outstanding requests") {
+    assert(is_valid(tbe));
+    if ((tbe.NumOutstandingReqs <= tbe.OutstandingThreshold) &&
+        (tbe.AllOutstandingTriggered == false)) {
+      APPEND_TRANSITION_COMMENT(" no more outstanding: ");
+      APPEND_TRANSITION_COMMENT(tbe.NumOutstandingReqs);
+      APPEND_TRANSITION_COMMENT(tbe.OutstandingThreshold);
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+          out_msg.Type := TriggerType:AllOutstanding;
+          if (tbe.DemandRequest) {
+              out_msg.addr := tbe.DemandAddress;
+          } else {
+              out_msg.addr := getRegionBase(address);
+          }
+          DPRINTF(RubySlicc, "co enqueuing %s\n", out_msg);
+          tbe.AllOutstandingTriggered := true;
+      }
+    } else {
+      APPEND_TRANSITION_COMMENT(" still more outstanding ");
+    }
+  }
+
+  action(ro_resetAllOutstanding, "ro", desc="Reset all outstanding") {
+      tbe.AllOutstandingTriggered := false;
+  }
+
+  action(so_setOutstandingCheckOne, "so", desc="Check outstanding is waiting for 1, not 0") {
+    // Need this for S_P because one request is outstanding between here and r-dir
+    tbe.OutstandingThreshold := 1;
+  }
+
+  action(a_allocateRegionEntry, "a", desc="Allocate a new entry") {
+    set_cache_entry(cacheMemory.allocate(getRegionBase(address), new Entry));
+    cache_entry.ValidBlocks.clear();
+    cache_entry.ValidBlocks.resize(blocksPerRegion);
+    cache_entry.UsedBlocks.clear();
+    cache_entry.UsedBlocks.resize(blocksPerRegion);
+    cache_entry.dirty := false;
+    cache_entry.NumOutstandingReqs := 0;
+    cache_entry.OutstandingReqs.clear();
+    cache_entry.OutstandingReqs.resize(blocksPerRegion);
+  }
+
+  action(d_deallocateRegionEntry, "d", desc="Deallocate region entry") {
+    cacheMemory.deallocate(getRegionBase(address));
+    unset_cache_entry();
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    TBEs.allocate(getRegionBase(address));
+    set_tbe(getTBE(address));
+    tbe.OutstandingAcks := 0;
+    tbe.AllAcksReceived := true; // starts true since the region could be empty
+    tbe.DoneEvicting := false;
+    tbe.AcksReceived.clear();
+    tbe.AcksReceived.resize(blocksPerRegion);
+    tbe.SendAck := false;
+    tbe.OutstandingThreshold := 0;
+    if (is_valid(cache_entry)) {
+      tbe.NumOutstandingReqs := cache_entry.NumOutstandingReqs;
+      tbe.OutstandingReqs := cache_entry.OutstandingReqs;
+      assert(tbe.NumOutstandingReqs == countBoolVec(tbe.OutstandingReqs));
+      tbe.dirty := cache_entry.dirty;
+      tbe.ValidBlocks := cache_entry.ValidBlocks;
+      tbe.OutstandingAcks := countBoolVec(tbe.ValidBlocks);
+      APPEND_TRANSITION_COMMENT(" tbe valid blocks ");
+      APPEND_TRANSITION_COMMENT(tbe.ValidBlocks);
+      APPEND_TRANSITION_COMMENT(" cache valid blocks ");
+      APPEND_TRANSITION_COMMENT(cache_entry.ValidBlocks);
+    } else {
+      tbe.dirty := false;
+    }
+  }
+
+  action(m_markSendAck, "m", desc="Mark TBE that we need to ack at end") {
+    assert(is_valid(tbe));
+    tbe.SendAck := true;
+  }
+
+  action(db_markDirtyBit, "db", desc="Mark TBE dirty bit") {
+      peek(unblockNetwork_in, UnblockMsg) {
+          if (is_valid(tbe)) {
+              tbe.dirty := tbe.dirty || in_msg.Dirty;
+          }
+      }
+  }
+
+  action(dr_markDoneAckReceived, "dr", desc="Mark TBE that a done ack has been received") {
+    assert(is_valid(tbe));
+    tbe.DoneAckReceived := true;
+    tbe.DoneAckAddr := address;
+    APPEND_TRANSITION_COMMENT(" marking done ack on TBE ");
+  }
+
+  action(se_setTBE, "se", desc="Set msg type to evict") {
+    peek(probeNetwork_in, NBProbeRequestMsg) {
+        tbe.MsgType := in_msg.Type;
+        tbe.Requestor := in_msg.Requestor;
+        tbe.DemandAddress := in_msg.DemandAddress;
+        tbe.DemandRequest := in_msg.DemandRequest;
+    }
+  }
+
+  action(sne_setNewTBE, "sne", desc="Set msg type to evict") {
+    peek(probeNetwork_in, NBProbeRequestMsg) {
+        tbe.NewMsgType := in_msg.Type;
+        tbe.NewRequestor := in_msg.Requestor;
+        tbe.NewDemandAddress := in_msg.DemandAddress;
+        tbe.NewDemandRequest := in_msg.DemandRequest;
+    }
+  }
+
+  action(soe_setOldTBE, "soe", desc="Set msg type to evict") {
+    tbe.MsgType := tbe.NewMsgType;
+    tbe.Requestor := tbe.NewRequestor;
+    tbe.DemandAddress := tbe.NewDemandAddress;
+    tbe.DemandRequest := tbe.NewDemandRequest;
+    tbe.OutstandingAcks :=  countBoolVec(tbe.ValidBlocks);
+    tbe.AllAcksReceived := true; // starts true since the region could be empty
+    tbe.DoneEvicting := false;
+    tbe.AcksReceived.clear();
+    tbe.AcksReceived.resize(blocksPerRegion);
+    tbe.SendAck := false;
+  }
+
+  action(ser_setTBE, "ser", desc="Set msg type to evict repl") {
+    tbe.MsgType := ProbeRequestType:PrbInv;
+  }
+
+  action(md_setMustDowngrade, "md", desc="When permissions finally get here, must be shared") {
+    assert(is_valid(cache_entry));
+    cache_entry.MustDowngrade := true;
+  }
+
+  action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+    TBEs.deallocate(getRegionBase(address));
+    unset_tbe();
+  }
+
+  action(p_popRequestQueue, "p", desc="Pop the request queue") {
+    requestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pl_popUnblockQueue, "pl", desc="Pop the unblock queue") {
+    unblockNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pn_popNotifyQueue, "pn", desc="Pop the notify queue") {
+    notifyNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pp_popProbeQueue, "pp", desc="Pop the probe queue") {
+    probeNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pt_popTriggerQueue, "pt", desc="Pop the trigger queue") {
+    DPRINTF(RubySlicc, "Trigger Before Contents: %s\n", triggerQueue_in);
+    triggerQueue_in.dequeue(clockEdge());
+    DPRINTF(RubySlicc, "Trigger After Contents: %s\n", triggerQueue_in);
+  }
+
+  // Must always use wake all, since non-region address wait on region addresses
+  action(wa_wakeUpAllDependents, "wa", desc="Wake up any requests waiting for this region") {
+    wakeUpAllBuffers();
+  }
+
+  action(zz_stallAndWaitRequestQueue, "\z", desc="recycle request queue") {
+    Addr regAddr := getRegionBase(address);
+    DPRINTF(RubySlicc, "Stalling address %s\n", regAddr);
+    stall_and_wait(requestNetwork_in, regAddr);
+  }
+
+  action(yy_stallAndWaitProbeQueue, "\y", desc="stall probe queue") {
+    Addr regAddr := getRegionBase(address);
+    stall_and_wait(probeNetwork_in, regAddr);
+  }
+
+  action(yyy_recycleProbeQueue, "\yy", desc="recycle probe queue") {
+    probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(zzz_recycleRequestQueue, "\zz", desc="recycle request queue") {
+    requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(www_recycleUnblockNetwork, "\ww", desc="recycle unblock queue") {
+    unblockNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(z_stall, "z", desc="stall request queue") {
+    // fake state
+  }
+
+  action(mru_setMRU, "mru", desc="set MRU") {
+    cacheMemory.setMRU(address, cache_entry.NumValidBlocks);
+  }
+
+  // Transitions
+
+  transition({NP_PS, S_P, S_NP_PS, P_NP, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, P_NP_W, P_NP_NP, NP_W}, {CPURead, CPUWriteback, CPUWrite}) {} {
+    zz_stallAndWaitRequestQueue;
+  }
+
+  transition(SS_P, {CPURead, CPUWriteback}) {
+    zz_stallAndWaitRequestQueue;
+  }
+
+  transition({NP, S, P, NP_PS, S_P, S_NP_PS, P_NP, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, SS_P, NP_W, P_NP_NP}, StallAccess) {} {
+    zz_stallAndWaitRequestQueue;
+  }
+
+  transition({S, P, NP_PS, S_P, S_NP_PS, P_NP, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, SS_P, P_NP_W, P_NP_NP, NP_W}, StallDoneAck) {
+    www_recycleUnblockNetwork;
+  }
+
+  transition(NP, StallDoneAck, NP_W) {
+    t_allocateTBE;
+    db_markDirtyBit;
+    dr_markDoneAckReceived;
+    pl_popUnblockQueue;
+  }
+
+  transition(NP_W, StaleRequest, NP) {
+    f_fwdReqToDir;
+    dt_deallocateTBE;
+    wa_wakeUpAllDependents;
+    p_popRequestQueue;
+  }
+
+  transition(P_NP_O, DowngradeRegion) {} {
+    z_stall; // should stall and wait
+  }
+
+  transition({NP_PS, S_NP_PS, S_P, P_S, P_NP_O, S_NP_PS_O, P_S_O, S_O, SS_P}, ReplRegion) {} {
+    zz_stallAndWaitRequestQueue; // can't let things get out of order!
+  }
+
+  transition({P_NP_O, S_O, SS_P}, InvRegion) {} {
+    yyy_recycleProbeQueue; // can't be z_stall because there could be a RdBlkM in the requestQueue which has the sinked flag which is blocking the inv
+  }
+
+  transition(P_NP, {InvRegion, DowngradeRegion}, P_NP_NP) {} {
+    sne_setNewTBE;
+    pp_popProbeQueue;
+  }
+
+  transition(S_P, DowngradeRegion) {} {
+    adp_AckDemandProbe;
+    ain_ackRegionInvNow;
+    pp_popProbeQueue;
+  }
+
+  transition(P_NP_W, InvRegion) {
+    adp_AckDemandProbe;
+    ain_ackRegionInvNow;
+    pp_popProbeQueue;
+  }
+
+  transition(P_NP_W, DowngradeRegion) {
+    adp_AckDemandProbe;
+    aine_ackRegionInvExlusiveNow;
+    pp_popProbeQueue;
+  }
+
+  transition({P, S}, {CPURead, CPUWriteback}) {TagArrayRead, TagArrayWrite} {
+    mru_setMRU;
+    f_fwdReqToDir;
+    u_updateRegionEntry;
+    p_popRequestQueue;
+  }
+
+  transition(P, CPUWrite) {TagArrayRead, TagArrayWrite} {
+    mru_setMRU;
+    f_fwdReqToDir;
+    u_updateRegionEntry;
+    p_popRequestQueue;
+  }
+
+  transition(S, CPUWrite, S_O) {TagArrayRead} {
+    mru_setMRU;
+    t_allocateTBE;
+    co_checkOutstanding;
+    zz_stallAndWaitRequestQueue;
+  }
+
+  transition(S_O, AllOutstanding, SS_P) {
+    wa_wakeUpAllDependents;
+    ro_resetAllOutstanding;
+    pt_popTriggerQueue;
+  }
+
+  transition(SS_P, CPUWrite, S_P) {
+    mru_setMRU;
+    dt_deallocateTBE;
+    ru_requestUpgrade;
+    u_updateRegionEntry;
+    p_popRequestQueue;
+  }
+
+  transition(NP, {CPURead, CPUWriteback}, NP_PS) {TagArrayRead, TagArrayWrite} {
+    a_allocateRegionEntry;
+    rs_requestShared;
+    u_updateRegionEntry;
+    p_popRequestQueue;//zz_stallAndWaitRequestQueue;
+  }
+
+  transition(NP, CPUWrite, NP_PS) {TagArrayRead, TagArrayWrite} {
+    a_allocateRegionEntry;
+    rp_requestPrivate;
+    u_updateRegionEntry;
+    p_popRequestQueue;//zz_stallAndWaitRequestQueue;
+  }
+
+  transition(NP_PS, PrivateNotify, P) {} {
+    ap_ackPrivateNotify;
+    wa_wakeUpAllDependents;
+    pn_popNotifyQueue;
+  }
+
+  transition(S_P, PrivateNotify, P) {} {
+    ap_ackPrivateNotify;
+    wa_wakeUpAllDependents;
+    pn_popNotifyQueue;
+  }
+
+  transition(NP_PS, SharedNotify, S) {} {
+    ap_ackPrivateNotify;
+    wa_wakeUpAllDependents;
+    pn_popNotifyQueue;
+  }
+
+  transition(P_NP_W, WbNotify, NP) {} {
+    aw_ackWbNotify;
+    wa_wakeUpAllDependents;
+    dt_deallocateTBE;
+    pn_popNotifyQueue;
+  }
+
+  transition({P, S}, ReplRegion, P_NP_O) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    ser_setTBE;
+    d_deallocateRegionEntry;
+    co_checkOutstanding;
+  }
+
+  transition({P, S}, InvRegion, P_NP_O) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    se_setTBE;
+    m_markSendAck;
+    d_deallocateRegionEntry;
+    co_checkOutstanding;
+    pp_popProbeQueue;
+  }
+
+  transition(P_NP_O, AllOutstanding, P_NP) {} {
+    ed_evictDemand;
+    ef_enqueueFirstEvict;
+    ro_resetAllOutstanding;
+    pt_popTriggerQueue;
+  }
+
+  transition(S_P, InvRegion, S_NP_PS_O) {TagArrayRead} {
+    t_allocateTBE;
+    se_setTBE;
+    m_markSendAck;
+    so_setOutstandingCheckOne;
+    co_checkOutstanding;
+    pp_popProbeQueue;
+  }
+
+  transition(S_NP_PS_O, AllOutstanding, S_NP_PS) {
+    ed_evictDemand;
+    ef_enqueueFirstEvict;
+    ro_resetAllOutstanding;
+    pt_popTriggerQueue;
+  }
+
+  transition(P, DowngradeRegion, P_S_O) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    se_setTBE;
+    m_markSendAck;
+    co_checkOutstanding;
+    pp_popProbeQueue;
+  }
+
+  transition(P_S_O, AllOutstanding, P_S) {} {
+    ed_evictDemand;
+    ef_enqueueFirstEvict;
+    ro_resetAllOutstanding;
+    pt_popTriggerQueue;
+  }
+
+  transition({P, S}, DoneAck) {TagArrayWrite} {
+    do_decrementOutstanding;
+    wa_wakeUpAllDependents;
+    db_markDirtyBit;
+    uw_updatePossibleWriteback;
+    pl_popUnblockQueue;
+  }
+
+  transition({S_P, NP_PS, S_NP_PS}, DoneAck) {TagArrayWrite} {
+      www_recycleUnblockNetwork;
+  }
+
+  transition({P_NP_O, S_NP_PS_O, P_S_O, S_O}, DoneAck) {} {
+    do_decrementOutstanding;
+    co_checkOutstanding;
+    db_markDirtyBit;
+    uw_updatePossibleWriteback;
+    pl_popUnblockQueue;
+  }
+
+  transition({P_NP, P_S, S_NP_PS, P_NP_NP}, Evict) {} {
+    e_evictCurrent;
+    en_enqueueNextEvict;
+    pt_popTriggerQueue;
+  }
+
+  transition({P_NP, P_S, S_NP_PS, P_NP_NP}, InvAck) {} {
+    ra_receiveAck;
+    db_markDirtyBit;
+    pl_popUnblockQueue;
+  }
+
+  transition(P_NP, LastAck_CleanWb, P_NP_W) {} {
+      rw_requestWriteback;
+      pt_popTriggerQueue;
+  }
+
+  transition(P_NP_NP, LastAck_CleanWb, P_NP) {} {
+    soe_setOldTBE;
+    m_markSendAck;
+    ed_evictDemand;
+    ef_enqueueFirstEvict;
+    pt_popTriggerQueue;
+  }
+
+  transition(P_NP, LastAck_PrbResp, NP) {} {
+    aie_ackRegionExclusiveInv;
+    dt_deallocateTBE;
+    wa_wakeUpAllDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(S_NP_PS, LastAck_PrbResp, NP_PS) {} {
+    aie_ackRegionExclusiveInv;
+    dt_deallocateTBE;
+    wa_wakeUpAllDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(P_S, LastAck_PrbResp, S) {} {
+    ai_ackRegionInv;
+    ad_ackDircetory;
+    dt_deallocateTBE;
+    wa_wakeUpAllDependents;
+    pt_popTriggerQueue;
+  }
+
+}
+
diff --git a/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm b/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm
new file mode 100644
index 000000000..b392311c5
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm
@@ -0,0 +1,1187 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Jason Power
+ */
+
+machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol")
+: CacheMemory *cacheMemory; // stores only region addresses. Must set block size same as below
+  NodeID cpuRegionBufferNum;
+  NodeID gpuRegionBufferNum;
+  int blocksPerRegion := 64; // 4k regions
+  Cycles toDirLatency := 10;    // Latency to fwd requests and send invs to directory
+  bool always_migrate := "False";
+  bool sym_migrate := "False";
+  bool asym_migrate := "False";
+  bool noTCCdir := "False";
+  int TCC_select_num_bits := 1;
+
+  // To the directory
+  MessageBuffer * requestToDir, network="To", virtual_network="5", vnet_type="request";
+
+  // To the region buffers
+  MessageBuffer * notifyToRBuffer, network="To", virtual_network="7", vnet_type="request";
+  MessageBuffer * probeToRBuffer, network="To", virtual_network="8", vnet_type="request";
+
+  // From the region buffers
+  MessageBuffer * responseFromRBuffer, network="From", virtual_network="2", vnet_type="response";
+  MessageBuffer * requestFromRegBuf, network="From", virtual_network="0", vnet_type="request";
+
+  MessageBuffer * triggerQueue;
+{
+
+  // States
+  state_declaration(State, desc="Region states", default="RegionDir_State_NP") {
+    NP, AccessPermission:Invalid,       desc="Not present in region directory";
+    P,  AccessPermission:Invalid,       desc="Region is private to owner";
+    S,  AccessPermission:Invalid,       desc="Region is shared between CPU and GPU";
+
+    P_NP,  AccessPermission:Invalid,    desc="Evicting the region";
+    NP_P,  AccessPermission:Invalid,    desc="Must wait for ack from R-buf";
+    NP_S,  AccessPermission:Invalid,    desc="Must wait for ack from R-buf";
+    P_P,   AccessPermission:Invalid,    desc="Waiting for ack from R-buf";
+    S_S,   AccessPermission:Invalid,    desc="Waiting for ack from R-buf";
+    P_S,   AccessPermission:Invalid,    desc="Downgrading the region";
+    S_P,   AccessPermission:Invalid,    desc="Upgrading the region";
+    P_AS,  AccessPermission:Invalid,    desc="Sent invalidates, waiting for acks";
+    S_AP,  AccessPermission:Invalid,    desc="Sent invalidates, waiting for acks";
+    P_AP,  AccessPermission:Invalid,    desc="Sent invalidates, waiting for acks";
+
+    SP_NP_W, AccessPermission:Invalid,   desc="Last sharer writing back, waiting for ack";
+    S_W,   AccessPermission:Invalid,   desc="Sharer writing back, waiting for ack";
+
+    P_AP_W, AccessPermission:Invalid,   desc="Fwded request to dir, waiting for ack";
+    P_AS_W, AccessPermission:Invalid,   desc="Fwded request to dir, waiting for ack";
+    S_AP_W, AccessPermission:Invalid,   desc="Fwded request to dir, waiting for ack";
+  }
+
+  enumeration(Event, desc="Region directory events") {
+    SendInv,        desc="Send inv message to any machine that has a region buffer";
+    SendUpgrade,    desc="Send upgrade message to any machine that has a region buffer";
+    SendDowngrade,  desc="Send downgrade message to any machine that has a region buffer";
+
+    Evict,          desc="Evict this region";
+
+    UpgradeRequest, desc="Request from r-buf for an upgrade";
+    SharedRequest,  desc="Request from r-buf for read";
+    PrivateRequest, desc="Request from r-buf for write";
+
+    InvAckCore,     desc="Ack from region buffer to order the invalidate";
+    InvAckCoreNoShare,     desc="Ack from region buffer to order the invalidate, and it does not have the region";
+    CPUPrivateAck,  desc="Ack from region buffer to order private notification";
+
+    LastAck,      desc="Done eviciting all the blocks";
+
+    StaleCleanWbRequest, desc="stale clean writeback reqeust";
+    StaleCleanWbRequestNoShare, desc="stale clean wb req from a cache which should be removed from sharers";
+    CleanWbRequest, desc="clean writeback reqeust, multiple sharers";
+    CleanWbRequest_LastSharer, desc="clean writeback reqeust, last sharer";
+    WritebackAck,   desc="Writeback Ack from region buffer";
+    DirReadyAck,   desc="Directory is ready, waiting Ack from region buffer";
+
+    TriggerInv,   desc="trigger invalidate message";
+    TriggerDowngrade, desc="trigger downgrade message";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    DataArrayRead,    desc="Read the data array";
+    DataArrayWrite,   desc="Write the data array";
+    TagArrayRead,     desc="Read the data array";
+    TagArrayWrite,    desc="Write the data array";
+  }
+
+  structure(BoolVec, external="yes") {
+    bool at(int);
+    void resize(int);
+    void clear();
+  }
+
+  structure(Entry, desc="Region entry", interface="AbstractCacheEntry") {
+    Addr addr,        desc="Base address of this region";
+    NetDest Sharers,        desc="Set of machines that are sharing, but not owners";
+    State RegionState,      desc="Region state";
+    DataBlock DataBlk,      desc="Data for the block (always empty in region dir)";
+    MachineID Owner,        desc="Machine which owns all blocks in this region";
+    Cycles ProbeStart,        desc="Time when the first probe request was issued";
+    bool LastWriten, default="false", desc="The last time someone accessed this region, it wrote it";
+    bool LastWritenByCpu, default="false", desc="The last time the CPU accessed this region, it wrote it";
+    bool LastWritenByGpu, default="false", desc="The last time the GPU accessed this region, it wrote it";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,         desc="Transient state";
+    MachineID Owner,        desc="Machine which owns all blocks in this region";
+    NetDest Sharers,        desc="Set of machines to send evicts";
+    int NumValidBlocks,     desc="Number of blocks valid so we don't have to count a BoolVec";
+    bool AllAcksReceived,   desc="Got all necessary acks from dir";
+    CoherenceRequestType MsgType, desc="Msg type for the evicts could be inv or dwngrd";
+    Cycles ProbeRequestTime, default="Cycles(0)", desc="Start of probe request";
+    Cycles InitialRequestTime, default="Cycles(0)", desc="To forward back on out msg";
+    Addr DemandAddress, desc="Demand address from original request";
+    uint64_t probe_id,        desc="probe id for lifetime profiling";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  // Stores only region addresses
+  TBETable TBEs, template="<RegionDir_TBE>", constructor="m_number_of_TBEs";
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  void set_cache_entry(AbstractCacheEntry b);
+  void unset_cache_entry();
+  void set_tbe(TBE b);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  int blockBits,  default="RubySystem::getBlockSizeBits()";
+  int blockBytes, default="RubySystem::getBlockSizeBytes()";
+  int regionBits, default="log2(m_blocksPerRegion)";
+
+  // Functions
+
+  MachineID getCoreMachine(MachineID rBuf, Addr address) {
+    if (machineIDToNodeID(rBuf) == cpuRegionBufferNum) {
+      return createMachineID(MachineType:CorePair, intToID(0));
+    } else if (machineIDToNodeID(rBuf) == gpuRegionBufferNum) {
+      if (noTCCdir) {
+        return mapAddressToRange(address,MachineType:TCC,
+                                    TCC_select_low_bit, TCC_select_num_bits);
+      } else {
+        return createMachineID(MachineType:TCCdir, intToID(0));
+      }
+    } else {
+      error("Unexpected region buffer number");
+    }
+  }
+
+  bool isCpuMachine(MachineID rBuf) {
+    if (machineIDToNodeID(rBuf) == cpuRegionBufferNum) {
+      return true;
+    } else if (machineIDToNodeID(rBuf) == gpuRegionBufferNum) {
+      return false;
+    } else {
+      error("Unexpected region buffer number");
+    }
+  }
+
+  bool symMigrate(Entry cache_entry) {
+      return cache_entry.LastWriten;
+  }
+
+  bool asymMigrate(Entry cache_entry, MachineID requestor) {
+      if (isCpuMachine(requestor)) {
+          return cache_entry.LastWritenByCpu;
+      } else {
+          return cache_entry.LastWritenByGpu;
+      }
+  }
+
+  int getRegionOffset(Addr addr) {
+    if (blocksPerRegion > 1) {
+      Addr offset := bitSelect(addr, blockBits, regionBits+blockBits-1);
+      int ret := addressToInt(offset);
+      assert(ret < blocksPerRegion);
+      return ret;
+    } else {
+      return 0;
+    }
+  }
+
+  Addr getRegionBase(Addr addr) {
+    return maskLowOrderBits(addr, blockBits+regionBits);
+  }
+
+  Addr getNextBlock(Addr addr) {
+    Addr a := addr;
+    makeNextStrideAddress(a, 1);
+    return a;
+  }
+
+  bool presentOrAvail(Addr addr) {
+    DPRINTF(RubySlicc, "Present? %s, avail? %s\n", cacheMemory.isTagPresent(getRegionBase(addr)), cacheMemory.cacheAvail(getRegionBase(addr)));
+    return cacheMemory.isTagPresent(getRegionBase(addr)) || cacheMemory.cacheAvail(getRegionBase(addr));
+  }
+
+  // Returns a region entry!
+  Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+    return static_cast(Entry, "pointer", cacheMemory.lookup(getRegionBase(addr)));
+  }
+
+  TBE getTBE(Addr addr), return_by_pointer="yes" {
+    return TBEs.lookup(getRegionBase(addr));
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    return getCacheEntry(getRegionBase(addr)).DataBlk;
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    if (is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.RegionState;
+    }
+    return State:NP;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    if (is_valid(tbe)) {
+        tbe.TBEState := state;
+    }
+    if (is_valid(cache_entry)) {
+        cache_entry.RegionState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := getTBE(addr);
+    if(is_valid(tbe)) {
+      return RegionDir_State_to_permission(tbe.TBEState);
+    }
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      return RegionDir_State_to_permission(cache_entry.RegionState);
+    }
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(RegionDir_State_to_permission(state));
+    }
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    functionalMemoryRead(pkt);
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    if (functionalMemoryWrite(pkt)) {
+      return 1;
+    } else {
+      return 0;
+    }
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      cacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      cacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      cacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      cacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:DataArrayRead) {
+      return cacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:DataArrayWrite) {
+      return cacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:TagArrayRead) {
+      return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:TagArrayWrite) {
+      return cacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+
+  out_port(requestNetwork_out, CPURequestMsg, requestToDir);
+  out_port(notifyNetwork_out, CPURequestMsg, notifyToRBuffer);
+  out_port(probeNetwork_out, NBProbeRequestMsg, probeToRBuffer);
+
+  in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=2) {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        assert(in_msg.addr == getRegionBase(in_msg.addr));
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := getTBE(in_msg.addr);
+        DPRINTF(RubySlicc, "trigger msg: %s (%s)\n", in_msg, getRegionBase(in_msg.addr));
+        if (in_msg.Type == TriggerType:AcksComplete) {
+          assert(is_valid(tbe));
+          trigger(Event:LastAck, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == TriggerType:InvRegion) {
+          assert(is_valid(tbe));
+          trigger(Event:TriggerInv, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == TriggerType:DowngradeRegion) {
+          assert(is_valid(tbe));
+          trigger(Event:TriggerDowngrade, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Unknown trigger message");
+        }
+      }
+    }
+  }
+
+  in_port(responseNetwork_in, ResponseMsg, responseFromRBuffer, rank=1) {
+    if (responseNetwork_in.isReady(clockEdge())) {
+      peek(responseNetwork_in, ResponseMsg) {
+        TBE tbe := getTBE(in_msg.addr);
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+          assert(in_msg.addr == getRegionBase(in_msg.addr));
+          assert(is_valid(tbe));
+          if (in_msg.NotCached) {
+            trigger(Event:InvAckCoreNoShare, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:InvAckCore, in_msg.addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:PrivateAck) {
+          assert(in_msg.addr == getRegionBase(in_msg.addr));
+          assert(is_valid(cache_entry));
+          //Fix Me...add back in: assert(cache_entry.Sharers.isElement(in_msg.Sender));
+          trigger(Event:CPUPrivateAck, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:RegionWbAck) {
+            //Fix Me...add back in: assert(cache_entry.Sharers.isElement(in_msg.Sender) == false);
+          assert(in_msg.addr == getRegionBase(in_msg.addr));
+          trigger(Event:WritebackAck, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:DirReadyAck) {
+            assert(is_valid(tbe));
+            trigger(Event:DirReadyAck, getRegionBase(in_msg.addr), cache_entry, tbe);
+        } else {
+          error("Invalid response type");
+        }
+      }
+    }
+  }
+
+  // In from cores
+  // NOTE: We get the cache / TBE entry based on the region address,
+  //       but pass the block address to the actions
+  in_port(requestNetwork_in, CPURequestMsg, requestFromRegBuf, rank=0) {
+    if (requestNetwork_in.isReady(clockEdge())) {
+      peek(requestNetwork_in, CPURequestMsg) {
+        //assert(in_msg.addr == getRegionBase(in_msg.addr));
+        Addr address := getRegionBase(in_msg.addr);
+        DPRINTF(RubySlicc, "Got %s, base %s\n", in_msg.addr, address);
+        if (presentOrAvail(address)) {
+          TBE tbe := getTBE(address);
+          Entry cache_entry := getCacheEntry(address);
+          if (in_msg.Type == CoherenceRequestType:PrivateRequest) {
+            if (is_valid(cache_entry) && (cache_entry.Owner != in_msg.Requestor ||
+                getState(tbe, cache_entry, address) == State:S)) {
+              trigger(Event:SendInv, address, cache_entry, tbe);
+            } else {
+              trigger(Event:PrivateRequest, address, cache_entry, tbe);
+            }
+          } else if (in_msg.Type == CoherenceRequestType:SharedRequest) {
+            if (is_invalid(cache_entry)) {
+              // If no one has ever requested this region give private permissions
+              trigger(Event:PrivateRequest, address, cache_entry, tbe);
+            } else {
+                if (always_migrate ||
+                    (sym_migrate && symMigrate(cache_entry)) ||
+                    (asym_migrate && asymMigrate(cache_entry, in_msg.Requestor))) {
+                    if (cache_entry.Sharers.count() == 1 &&
+                        cache_entry.Sharers.isElement(in_msg.Requestor)) {
+                        trigger(Event:UpgradeRequest, address, cache_entry, tbe);
+                    } else {
+                        trigger(Event:SendInv, address, cache_entry, tbe);
+                    }
+                } else { // don't migrate
+                    if(cache_entry.Sharers.isElement(in_msg.Requestor) ||
+                       getState(tbe, cache_entry, address) == State:S) {
+                        trigger(Event:SharedRequest, address, cache_entry, tbe);
+                    } else {
+                        trigger(Event:SendDowngrade, address, cache_entry, tbe);
+                    }
+                }
+            }
+          } else if (in_msg.Type == CoherenceRequestType:UpgradeRequest) {
+            if (is_invalid(cache_entry)) {
+              trigger(Event:PrivateRequest, address, cache_entry, tbe);
+            } else if (cache_entry.Sharers.count() == 1 && cache_entry.Sharers.isElement(in_msg.Requestor)) {
+              trigger(Event:UpgradeRequest, address, cache_entry, tbe);
+            } else {
+              trigger(Event:SendUpgrade, address, cache_entry, tbe);
+            }
+          } else if (in_msg.Type == CoherenceRequestType:CleanWbRequest) {
+            if (is_invalid(cache_entry) || cache_entry.Sharers.isElement(in_msg.Requestor) == false) {
+              trigger(Event:StaleCleanWbRequest, address, cache_entry, tbe);
+            } else {
+                DPRINTF(RubySlicc, "wb address %s(%s) owner %s sharers %s requestor %s %d %d\n", in_msg.addr, getRegionBase(in_msg.addr), cache_entry.Owner, cache_entry.Sharers, in_msg.Requestor, cache_entry.Sharers.isElement(in_msg.Requestor), cache_entry.Sharers.count());
+                if (cache_entry.Sharers.isElement(in_msg.Requestor) && cache_entry.Sharers.count() == 1) {
+                    DPRINTF(RubySlicc, "last wb\n");
+                    trigger(Event:CleanWbRequest_LastSharer, address, cache_entry, tbe);
+                } else {
+                    DPRINTF(RubySlicc, "clean wb\n");
+                    trigger(Event:CleanWbRequest, address, cache_entry, tbe);
+                }
+            }
+          } else {
+            error("unknown region dir request type");
+          }
+        } else {
+          Addr victim := cacheMemory.cacheProbe(getRegionBase(in_msg.addr));
+          TBE victim_tbe := getTBE(victim);
+          Entry victim_entry := getCacheEntry(victim);
+          DPRINTF(RubySlicc, "Evicting address %s for new region at address %s(%s)\n", victim, in_msg.addr, getRegionBase(in_msg.addr));
+          assert(is_valid(victim_entry));
+          trigger(Event:Evict, victim, victim_entry, victim_tbe);
+        }
+      }
+    }
+  }
+
+  // Actions
+
+  action(f_fwdReqToDir, "f", desc="Forward CPU request to directory") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+        out_msg.addr := in_msg.addr;  // This is the block address. "address" is the region address
+        out_msg.Type := in_msg.OriginalType;
+        out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.Dirty := in_msg.Dirty;
+        out_msg.Requestor := getCoreMachine(in_msg.Requestor,address);
+        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+        out_msg.Shared := in_msg.Shared;
+        out_msg.MessageSize := in_msg.MessageSize;
+        out_msg.Private := in_msg.Private;
+        out_msg.NoAckNeeded := true;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ProbeRequestStartTime := curCycle();
+        out_msg.DemandRequest := true;
+        if (is_valid(cache_entry) && getState(tbe, cache_entry, address) != State:S) {
+            out_msg.Acks := cache_entry.Sharers.count();
+        } else {
+            out_msg.Acks := 0;
+        }
+      }
+    }
+  }
+
+  action(f_fwdReqToDirShared, "fs", desc="Forward CPU request to directory (shared)") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+        out_msg.addr := in_msg.addr;  // This is the block address. "address" is the region address
+        out_msg.Type := in_msg.OriginalType;
+        out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.Dirty := in_msg.Dirty;
+        out_msg.Requestor := getCoreMachine(in_msg.Requestor,address);
+        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+        out_msg.Shared := in_msg.Shared;
+        out_msg.MessageSize := in_msg.MessageSize;
+        out_msg.Private := in_msg.Private;
+        out_msg.NoAckNeeded := true;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ProbeRequestStartTime := curCycle();
+        out_msg.DemandRequest := true;
+        out_msg.ForceShared := true;
+        if (is_valid(cache_entry) && getState(tbe, cache_entry, address) != State:S) {
+            out_msg.Acks := cache_entry.Sharers.count();
+        } else {
+            out_msg.Acks := 0;
+        }
+      }
+    }
+  }
+
+  action(f_fwdReqToDirWithAck, "fa", desc="Forward CPU request to directory with ack request") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+        out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address
+        out_msg.Type := in_msg.OriginalType;
+        out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.Dirty := in_msg.Dirty;
+        out_msg.Requestor := getCoreMachine(in_msg.Requestor,address);
+        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+        out_msg.Shared := in_msg.Shared;
+        out_msg.MessageSize := in_msg.MessageSize;
+        out_msg.Private := in_msg.Private;
+        out_msg.NoAckNeeded := false;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ProbeRequestStartTime := curCycle();
+        out_msg.DemandRequest := true;
+        if (is_valid(cache_entry)) {
+            out_msg.Acks := cache_entry.Sharers.count();
+            // Don't need an ack from the requestor!
+            if (cache_entry.Sharers.isElement(in_msg.Requestor)) {
+                out_msg.Acks := out_msg.Acks - 1;
+            }
+        } else {
+            out_msg.Acks := 0;
+        }
+      }
+    }
+  }
+
+  action(f_fwdReqToDirWithAckShared, "fas", desc="Forward CPU request to directory with ack request") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(requestNetwork_out, CPURequestMsg, toDirLatency) {
+        out_msg.addr := in_msg.addr; // This is the block address. "address" is the region address
+        out_msg.Type := in_msg.OriginalType;
+        out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.Dirty := in_msg.Dirty;
+        out_msg.Requestor := getCoreMachine(in_msg.Requestor,address);
+        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.Destination.add(map_Address_to_Directory(in_msg.addr));
+        out_msg.Shared := in_msg.Shared;
+        out_msg.MessageSize := in_msg.MessageSize;
+        out_msg.Private := in_msg.Private;
+        out_msg.NoAckNeeded := false;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ProbeRequestStartTime := curCycle();
+        out_msg.DemandRequest := true;
+        out_msg.ForceShared := true;
+        if (is_valid(cache_entry)) {
+            out_msg.Acks := cache_entry.Sharers.count();
+            // Don't need an ack from the requestor!
+            if (cache_entry.Sharers.isElement(in_msg.Requestor)) {
+                out_msg.Acks := out_msg.Acks - 1;
+            }
+        } else {
+            out_msg.Acks := 0;
+        }
+      }
+    }
+  }
+
+  action(a_allocateRegionEntry, "a", desc="Allocate a new entry") {
+    set_cache_entry(cacheMemory.allocate(getRegionBase(address), new Entry));
+    peek(requestNetwork_in, CPURequestMsg) {
+      APPEND_TRANSITION_COMMENT(in_msg.Requestor);
+    }
+  }
+
+  action(d_deallocateRegionEntry, "d", desc="Deallocate region entry") {
+    cacheMemory.deallocate(getRegionBase(address));
+    unset_cache_entry();
+  }
+
+  action(ra_receiveAck, "ra", desc="Mark TBE entry as received this ack") {
+    //assert(tbe.ValidBlocks.at(getRegionOffset(address)));
+    DPRINTF(RubySlicc, "received ack for %s reg: %s\n", address, getRegionBase(address));
+    tbe.NumValidBlocks := tbe.NumValidBlocks - 1;
+    assert(tbe.NumValidBlocks >= 0);
+    if (tbe.NumValidBlocks == 0) {
+      tbe.AllAcksReceived := true;
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.Type := TriggerType:AcksComplete;
+        out_msg.addr := address;
+      }
+    }
+    APPEND_TRANSITION_COMMENT(getRegionBase(address));
+    APPEND_TRANSITION_COMMENT(" Acks left receive ");
+    APPEND_TRANSITION_COMMENT(tbe.NumValidBlocks);
+  }
+
+  action(ca_checkAcks, "ca", desc="Check to see if we need more acks") {
+    if (tbe.NumValidBlocks == 0) {
+      tbe.AllAcksReceived := true;
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.Type := TriggerType:AcksComplete;
+        out_msg.addr := address;
+      }
+    }
+  }
+
+  action(ti_triggerInv, "ti", desc="") {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+          out_msg.Type := TriggerType:InvRegion;
+          out_msg.addr := address;
+      }
+  }
+
+  action(td_triggerDowngrade, "td", desc="") {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.Type := TriggerType:DowngradeRegion;
+        out_msg.addr := address;
+      }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    TBEs.allocate(getRegionBase(address));
+    set_tbe(getTBE(address));
+    if (is_valid(cache_entry)) {
+      tbe.Owner := cache_entry.Owner;
+      tbe.Sharers := cache_entry.Sharers;
+      tbe.AllAcksReceived := true; // assume no acks are required
+    }
+    tbe.ProbeRequestTime := curCycle();
+    peek(requestNetwork_in, CPURequestMsg) {
+      tbe.InitialRequestTime := in_msg.InitialRequestTime;
+      tbe.DemandAddress := in_msg.addr;
+    }
+    APPEND_TRANSITION_COMMENT(getRegionBase(address));
+    APPEND_TRANSITION_COMMENT(" Acks left ");
+    APPEND_TRANSITION_COMMENT(tbe.NumValidBlocks);
+    APPEND_TRANSITION_COMMENT(" Owner, ");
+    APPEND_TRANSITION_COMMENT(tbe.Owner);
+    APPEND_TRANSITION_COMMENT(" sharers, ");
+    APPEND_TRANSITION_COMMENT(tbe.Sharers);
+  }
+
+  action(ss_setSharers, "ss", desc="Add requestor to sharers") {
+    peek(requestNetwork_in, CPURequestMsg) {
+        cache_entry.Sharers.add(in_msg.Requestor);
+        APPEND_TRANSITION_COMMENT(cache_entry.Sharers);
+    }
+  }
+
+  action(rs_removeSharer, "rs", desc="Remove requestor to sharers") {
+    peek(requestNetwork_in, CPURequestMsg) {
+        cache_entry.Sharers.remove(in_msg.Requestor);
+        APPEND_TRANSITION_COMMENT(" removing ");
+        APPEND_TRANSITION_COMMENT(in_msg.Requestor);
+        APPEND_TRANSITION_COMMENT(" sharers ");
+        APPEND_TRANSITION_COMMENT(cache_entry.Sharers);
+    }
+  }
+
+  action(rsr_removeSharerResponse, "rsr", desc="Remove requestor to sharers") {
+    peek(responseNetwork_in, ResponseMsg) {
+        cache_entry.Sharers.remove(in_msg.Sender);
+        APPEND_TRANSITION_COMMENT(cache_entry.Sharers);
+    }
+  }
+
+  action(cs_clearSharers, "cs", desc="Add requestor to sharers") {
+    cache_entry.Sharers.clear();
+  }
+
+  action(so_setOwner, "so", desc="Set the owner to the requestor") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      cache_entry.Owner := in_msg.Requestor;
+      APPEND_TRANSITION_COMMENT(" Owner now: ");
+      APPEND_TRANSITION_COMMENT(cache_entry.Owner);
+    }
+  }
+
+  action(rr_removeRequestorFromTBE, "rr", desc="Remove requestor from TBE sharers") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      tbe.Sharers.remove(in_msg.Requestor);
+    }
+  }
+
+  action(ur_updateDirtyStatusOnRequest, "ur", desc="Update dirty status on demand request") {
+      peek(requestNetwork_in, CPURequestMsg) {
+          if (is_valid(cache_entry)) {
+              if ((in_msg.Type == CoherenceRequestType:SharedRequest) &&
+                  (cache_entry.Sharers.isElement(in_msg.Requestor) == false)) {
+                  cache_entry.LastWriten := false;
+                  if (isCpuMachine(in_msg.Requestor)) {
+                      cache_entry.LastWritenByCpu := false;
+                  } else {
+                      cache_entry.LastWritenByGpu := false;
+                  }
+              } else if ((in_msg.Type == CoherenceRequestType:PrivateRequest) ||
+                         (in_msg.Type == CoherenceRequestType:UpgradeRequest)) {
+                  cache_entry.LastWriten := true;
+                  if (isCpuMachine(in_msg.Requestor)) {
+                      cache_entry.LastWritenByCpu := true;
+                  } else {
+                      cache_entry.LastWritenByGpu := true;
+                  }
+              }
+          }
+      }
+  }
+
+  action(ud_updateDirtyStatusWithWb, "ud", desc="Update dirty status on writeback") {
+      peek(requestNetwork_in, CPURequestMsg) {
+          if (is_valid(cache_entry) && in_msg.Dirty) {
+              cache_entry.LastWriten := true;
+              if (isCpuMachine(in_msg.Requestor)) {
+                  cache_entry.LastWritenByCpu := true;
+              } else {
+                  cache_entry.LastWritenByGpu := true;
+              }
+          }
+      }
+  }
+
+  action(sns_setNumAcksSharers, "sns", desc="Set number of acks to one per shared region buffer") {
+    assert(is_valid(tbe));
+    assert(is_valid(cache_entry));
+    tbe.NumValidBlocks := tbe.Sharers.count();
+  }
+
+  action(sno_setNumAcksOne, "sno", desc="Set number of acks to one per shared region buffer") {
+    assert(is_valid(tbe));
+    assert(is_valid(cache_entry));
+    tbe.NumValidBlocks := 1;
+  }
+
+  action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+    TBEs.deallocate(getRegionBase(address));
+    APPEND_TRANSITION_COMMENT(" reg: ");
+    APPEND_TRANSITION_COMMENT(getRegionBase(address));
+    unset_tbe();
+  }
+
+  action(wb_sendWbNotice, "wb", desc="Send notice to cache that writeback is acknowledged") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+        out_msg.addr := getRegionBase(address);
+        out_msg.Type := CoherenceRequestType:WbNotify;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.Requestor := machineID;
+        out_msg.MessageSize := MessageSizeType:Request_Control;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+      }
+    }
+  }
+
+  action(wbn_sendWbNoticeNoAck, "wbn", desc="Send notice to cache that writeback is acknowledged (no ack needed)") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+        out_msg.addr := getRegionBase(address);
+        out_msg.Type := CoherenceRequestType:WbNotify;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.Requestor := machineID;
+        out_msg.MessageSize := MessageSizeType:Request_Control;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.NoAckNeeded := true;
+      }
+    }
+  }
+
+  action(b_sendPrivateNotice, "b", desc="Send notice to private cache that it has private access") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+        out_msg.addr := getRegionBase(address);
+        out_msg.Type := CoherenceRequestType:PrivateNotify;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.Requestor := machineID;
+        out_msg.MessageSize := MessageSizeType:Request_Control;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+      }
+    }
+  }
+
+  action(bs_sendSharedNotice, "bs", desc="Send notice to private cache that it has private access") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+        out_msg.addr := getRegionBase(address);
+        out_msg.Type := CoherenceRequestType:SharedNotify;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.Requestor := machineID;
+        out_msg.MessageSize := MessageSizeType:Request_Control;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+      }
+    }
+  }
+
+  action(c_sendSharedNoticeToOrigReq, "c", desc="Send notice to private cache that it has shared access") {
+    assert(is_valid(tbe));
+    enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+      out_msg.addr := getRegionBase(address);
+      out_msg.Type := CoherenceRequestType:SharedNotify;
+      out_msg.Destination.add(tbe.Owner);
+      out_msg.Requestor := machineID;
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.ProbeRequestStartTime := tbe.ProbeRequestTime;
+      out_msg.InitialRequestTime := tbe.InitialRequestTime;
+      APPEND_TRANSITION_COMMENT("dest: ");
+      APPEND_TRANSITION_COMMENT(out_msg.Destination);
+    }
+  }
+
+  action(sp_sendPrivateNoticeToOrigReq, "sp", desc="Send notice to private cache that it has private access") {
+    assert(is_valid(tbe));
+    enqueue(notifyNetwork_out, CPURequestMsg, 1) {
+      out_msg.addr := getRegionBase(address);
+      out_msg.Type := CoherenceRequestType:PrivateNotify;
+      out_msg.Destination.add(tbe.Owner);
+      out_msg.Requestor := machineID;
+      out_msg.MessageSize := MessageSizeType:Request_Control;
+      out_msg.ProbeRequestStartTime := tbe.ProbeRequestTime;
+      out_msg.InitialRequestTime := tbe.InitialRequestTime;
+      APPEND_TRANSITION_COMMENT("dest: ");
+      APPEND_TRANSITION_COMMENT(out_msg.Destination);
+    }
+  }
+
+  action(i_RegionInvNotify, "i", desc="Send notice to private cache that it no longer has private access") {
+      enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+          out_msg.addr := address;
+          out_msg.DemandAddress := tbe.DemandAddress;
+          //out_msg.Requestor := tbe.Requestor;
+          out_msg.Requestor := machineID;
+          out_msg.Type := ProbeRequestType:PrbInv;
+          //Fix me: assert(tbe.Sharers.count() > 0);
+          out_msg.DemandRequest := true;
+          out_msg.Destination := tbe.Sharers;
+          out_msg.MessageSize := MessageSizeType:Request_Control;
+          APPEND_TRANSITION_COMMENT("dest: ");
+          APPEND_TRANSITION_COMMENT(out_msg.Destination);
+      }
+  }
+
+  action(i0_RegionInvNotifyDemand0, "i0", desc="Send notice to private cache that it no longer has private access") {
+      enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+          out_msg.addr := address;
+          // Demand address should default to 0 -> out_msg.DemandAddress := 0;
+          out_msg.Requestor := machineID;
+          out_msg.Type := ProbeRequestType:PrbInv;
+          out_msg.Destination := tbe.Sharers;
+          out_msg.MessageSize := MessageSizeType:Request_Control;
+          APPEND_TRANSITION_COMMENT("dest: ");
+          APPEND_TRANSITION_COMMENT(out_msg.Destination);
+      }
+  }
+
+  action(rd_RegionDowngrade, "rd", desc="Send notice to private cache that it only has shared access") {
+        enqueue(probeNetwork_out, NBProbeRequestMsg, 1) {
+            out_msg.addr := address;
+            out_msg.DemandAddress := tbe.DemandAddress;
+            out_msg.Requestor := machineID;
+            out_msg.Type := ProbeRequestType:PrbDowngrade;
+            out_msg.DemandRequest := true;
+            out_msg.Destination := tbe.Sharers;
+            out_msg.MessageSize := MessageSizeType:Request_Control;
+            APPEND_TRANSITION_COMMENT("dest: ");
+            APPEND_TRANSITION_COMMENT(out_msg.Destination);
+        }
+  }
+
+  action(p_popRequestQueue, "p", desc="Pop the request queue") {
+    requestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pt_popTriggerQueue, "pt", desc="Pop the trigger queue") {
+    triggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="Pop the response queue") {
+    responseNetwork_in.dequeue(clockEdge());
+  }
+
+  action(s_stallAndWaitRequest, "s", desc="Stall and wait on the region address") {
+    Addr regAddr := getRegionBase(address);
+    stall_and_wait(requestNetwork_in, regAddr);
+  }
+
+  action(w_wakeUpRegionDependents, "w", desc="Wake up any requests waiting for this region") {
+    wakeUpBuffers(getRegionBase(address));
+  }
+
+  action(wa_wakeUpAllDependents, "wa", desc="Wake up any requests waiting for this region") {
+    wakeUpAllBuffers();
+  }
+
+  action(zz_recycleRequestQueue, "\z", desc="...") {
+    requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(z_stall, "z", desc="stall request queue") {
+    // fake state
+  }
+
+  action(mru_setMRU, "mru", desc="set MRU") {
+    cacheMemory.setMRU(address);
+  }
+
+ // Transistions
+
+  transition({NP_P, P_P, NP_S, S_S, S_P, P_S, P_NP, S_AP, P_AS, P_AP, SP_NP_W, S_W, P_AP_W, P_AS_W, S_AP_W}, {PrivateRequest, SharedRequest, UpgradeRequest, SendInv, SendUpgrade, SendDowngrade, CleanWbRequest, CleanWbRequest_LastSharer, StaleCleanWbRequest}) {
+    s_stallAndWaitRequest
+  }
+
+  transition({NP_P, P_P, NP_S, S_S, S_P, S_W, P_S, P_NP, S_AP, P_AS, P_AP, P_AP_W, P_AS_W, S_AP_W}, Evict) {
+    zz_recycleRequestQueue;
+  }
+
+  transition(NP, {PrivateRequest, SendUpgrade}, NP_P) {TagArrayRead, TagArrayWrite} {
+    a_allocateRegionEntry;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDir;
+    b_sendPrivateNotice;
+    so_setOwner;
+    ss_setSharers;
+    t_allocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition(P, {PrivateRequest, UpgradeRequest}, P_P) {TagArrayRead} {
+    mru_setMRU;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDir;
+    b_sendPrivateNotice;
+    t_allocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition({NP_P, P_P}, CPUPrivateAck, P) {
+    dt_deallocateTBE;
+    w_wakeUpRegionDependents;
+    pr_popResponseQueue;
+  }
+
+  transition({NP, P, S}, StaleCleanWbRequest) {TagArrayRead, TagArrayWrite} {
+      wbn_sendWbNoticeNoAck;
+      ud_updateDirtyStatusWithWb;
+      p_popRequestQueue;
+  }
+
+  transition(NP, SharedRequest, NP_S) {TagArrayRead, TagArrayWrite} {
+    a_allocateRegionEntry;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDirShared;
+    bs_sendSharedNotice;
+    so_setOwner;
+    ss_setSharers;
+    t_allocateTBE;
+    p_popRequestQueue;
+  }
+
+  // Could probably do this in parallel with other shared requests
+  transition(S, SharedRequest, S_S) {TagArrayRead, TagArrayWrite} {
+    mru_setMRU;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDirShared;
+    bs_sendSharedNotice;
+    ss_setSharers;
+    t_allocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition({P, S}, CleanWbRequest_LastSharer, SP_NP_W) {TagArrayRead, TagArrayWrite} {
+    ud_updateDirtyStatusWithWb;
+    wb_sendWbNotice;
+    rs_removeSharer;
+    t_allocateTBE;
+    d_deallocateRegionEntry;
+    p_popRequestQueue;
+  }
+
+  transition(S, CleanWbRequest, S_W) {TagArrayRead, TagArrayWrite} {
+    ud_updateDirtyStatusWithWb;
+    wb_sendWbNotice;
+    rs_removeSharer;
+    t_allocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition(SP_NP_W, WritebackAck, NP) {
+    dt_deallocateTBE;
+    w_wakeUpRegionDependents;
+    pr_popResponseQueue;
+  }
+
+  transition(S_W, WritebackAck, S) {
+    dt_deallocateTBE;
+    w_wakeUpRegionDependents;
+    pr_popResponseQueue;
+  }
+
+  transition({NP_S, S_S}, CPUPrivateAck, S) {
+    dt_deallocateTBE;
+    w_wakeUpRegionDependents;
+    pr_popResponseQueue;
+  }
+
+  transition(S, UpgradeRequest, S_P) {TagArrayRead, TagArrayWrite} {
+    mru_setMRU;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDir;
+    b_sendPrivateNotice;
+    so_setOwner;
+    t_allocateTBE;
+    p_popRequestQueue;
+  }
+
+  transition(S_P, CPUPrivateAck, P) {
+    dt_deallocateTBE;
+    w_wakeUpRegionDependents;
+    pr_popResponseQueue;
+  }
+
+  transition(P, SendInv, P_AP_W) {TagArrayRead, TagArrayWrite} {
+    mru_setMRU;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDirWithAck;
+    so_setOwner;
+    t_allocateTBE;
+    rr_removeRequestorFromTBE;
+    sns_setNumAcksSharers;
+    cs_clearSharers;
+    ss_setSharers;
+    //i_RegionInvNotify;
+    p_popRequestQueue;
+  }
+
+  transition({P_AP_W, S_AP_W}, DirReadyAck) {
+      ti_triggerInv;
+      pr_popResponseQueue;
+  }
+
+  transition(P_AS_W, DirReadyAck) {
+      td_triggerDowngrade;
+      pr_popResponseQueue;
+  }
+
+  transition(P_AS_W, TriggerDowngrade, P_AS) {
+      rd_RegionDowngrade;
+      pt_popTriggerQueue;
+  }
+
+  transition(P_AP_W, TriggerInv, P_AP) {
+      i_RegionInvNotify;
+      pt_popTriggerQueue;
+  }
+
+  transition(S_AP_W, TriggerInv, S_AP) {
+      i_RegionInvNotify;
+      pt_popTriggerQueue;
+  }
+
+  transition(P, SendUpgrade, P_AP_W) {TagArrayRead, TagArrayWrite} {
+    mru_setMRU;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDirWithAck;
+    so_setOwner;
+    t_allocateTBE;
+    rr_removeRequestorFromTBE;
+    sns_setNumAcksSharers;
+    cs_clearSharers;
+    ss_setSharers;
+    p_popRequestQueue;
+  }
+
+  transition(P, Evict, P_NP) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    sns_setNumAcksSharers;
+    i0_RegionInvNotifyDemand0;
+    d_deallocateRegionEntry;
+  }
+
+  transition(S, SendInv, P_AP_W) {TagArrayRead, TagArrayWrite} {
+    mru_setMRU;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDirWithAck;
+    so_setOwner;
+    t_allocateTBE;
+    rr_removeRequestorFromTBE;
+    sns_setNumAcksSharers;
+    cs_clearSharers;
+    ss_setSharers;
+    p_popRequestQueue;
+  }
+
+  transition(S, Evict, P_NP) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    sns_setNumAcksSharers;
+    i0_RegionInvNotifyDemand0;
+    d_deallocateRegionEntry;
+  }
+
+  transition(P_NP, LastAck, NP) {
+    dt_deallocateTBE;
+    wa_wakeUpAllDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(S, SendUpgrade, S_AP_W) {TagArrayRead, TagArrayWrite} {
+    mru_setMRU;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDirWithAck;
+    so_setOwner;
+    t_allocateTBE;
+    rr_removeRequestorFromTBE;
+    sns_setNumAcksSharers;
+    cs_clearSharers;
+    ss_setSharers;
+    p_popRequestQueue;
+  }
+
+  transition(S_AP, LastAck, S_P) {
+    sp_sendPrivateNoticeToOrigReq;
+    pt_popTriggerQueue;
+  }
+
+  transition(P_AP, LastAck, P_P) {
+    sp_sendPrivateNoticeToOrigReq;
+    pt_popTriggerQueue;
+  }
+
+  transition(P, SendDowngrade, P_AS_W) {TagArrayRead, TagArrayWrite} {
+    mru_setMRU;
+    ur_updateDirtyStatusOnRequest;
+    f_fwdReqToDirWithAckShared;
+    so_setOwner;
+    t_allocateTBE;
+    sns_setNumAcksSharers;
+    ss_setSharers; //why do we set the sharers before sending the downgrade?  Are we sending a downgrade to the requestor?
+    p_popRequestQueue;
+  }
+
+  transition(P_AS, LastAck, P_S) {
+    c_sendSharedNoticeToOrigReq;
+    pt_popTriggerQueue;
+  }
+
+  transition(P_S, CPUPrivateAck, S) {
+    dt_deallocateTBE;
+    w_wakeUpRegionDependents;
+    pr_popResponseQueue;
+  }
+
+  transition({P_NP, P_AS, S_AP, P_AP}, InvAckCore) {} {
+    ra_receiveAck;
+    pr_popResponseQueue;
+  }
+
+  transition({P_NP, S_AP, P_AP}, InvAckCoreNoShare) {} {
+    ra_receiveAck;
+    pr_popResponseQueue;
+  }
+
+  transition(P_AS, InvAckCoreNoShare) {} {
+    ra_receiveAck;
+    rsr_removeSharerResponse;
+    pr_popResponseQueue;
+  }
+
+}
+
+
diff --git a/src/mem/protocol/MOESI_AMD_Base-dir.sm b/src/mem/protocol/MOESI_AMD_Base-dir.sm
new file mode 100644
index 000000000..52cefda66
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-dir.sm
@@ -0,0 +1,1137 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+machine(MachineType:Directory, "AMD Baseline protocol")
+: DirectoryMemory * directory;
+  CacheMemory * L3CacheMemory;
+  Cycles response_latency := 5;
+  Cycles l3_hit_latency := 50;
+  bool noTCCdir := "False";
+  bool CPUonly := "False";
+  int TCC_select_num_bits;
+  bool useL3OnWT := "False";
+  Cycles to_memory_controller_latency := 1;
+
+  // From the Cores
+  MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseFromCores, network="From", virtual_network="2", vnet_type="response";
+  MessageBuffer * unblockFromCores, network="From", virtual_network="4", vnet_type="unblock";
+
+  MessageBuffer * probeToCore, network="To", virtual_network="0", vnet_type="request";
+  MessageBuffer * responseToCore, network="To", virtual_network="2", vnet_type="response";
+
+  MessageBuffer * triggerQueue;
+  MessageBuffer * L3triggerQueue;
+  MessageBuffer * responseFromMemory;
+{
+  // STATES
+  state_declaration(State, desc="Directory states", default="Directory_State_U") {
+    U, AccessPermission:Backing_Store,                 desc="unblocked";
+    BL, AccessPermission:Busy,                  desc="got L3 WB request";
+    // BL is Busy because it's possible for the data only to be in the network
+    // in the WB, L3 has sent it and gone on with its business in possibly I
+    // state.
+    BS_M, AccessPermission:Backing_Store,                 desc="blocked waiting for memory";
+    BM_M, AccessPermission:Backing_Store,                 desc="blocked waiting for memory";
+    B_M, AccessPermission:Backing_Store,                 desc="blocked waiting for memory";
+    BP, AccessPermission:Backing_Store,                 desc="blocked waiting for probes, no need for memory";
+    BS_PM, AccessPermission:Backing_Store,                desc="blocked waiting for probes and Memory";
+    BM_PM, AccessPermission:Backing_Store,                desc="blocked waiting for probes and Memory";
+    B_PM, AccessPermission:Backing_Store,                desc="blocked waiting for probes and Memory";
+    BS_Pm, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    BM_Pm, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    B_Pm, AccessPermission:Backing_Store,                desc="blocked waiting for probes, already got memory";
+    B, AccessPermission:Backing_Store,                  desc="sent response, Blocked til ack";
+  }
+
+  // Events
+  enumeration(Event, desc="Directory events") {
+    // CPU requests
+    RdBlkS,             desc="...";
+    RdBlkM,             desc="...";
+    RdBlk,              desc="...";
+    CtoD,               desc="...";
+    WriteThrough,       desc="WriteThrough Message";
+    Atomic,             desc="Atomic Message";
+
+    // writebacks
+    VicDirty,           desc="...";
+    VicClean,           desc="...";
+    CPUData,            desc="WB data from CPU";
+    StaleWB,         desc="Notification that WB has been superceded by a probe";
+
+    // probe responses
+    CPUPrbResp,            desc="Probe Response Msg";
+
+    ProbeAcksComplete,  desc="Probe Acks Complete";
+
+    L3Hit,              desc="Hit in L3 return data to core";
+
+    // Memory Controller
+    MemData, desc="Fetched data from memory arrives";
+    WBAck, desc="Writeback Ack from memory arrives";
+
+    CoreUnblock,            desc="Core received data, unblock";
+    UnblockWriteThrough,    desc="Unblock because of writethrough request finishing";
+
+    StaleVicDirty,        desc="Core invalidated before VicDirty processed";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    L3DataArrayRead,    desc="Read the data array";
+    L3DataArrayWrite,   desc="Write the data array";
+    L3TagArrayRead,     desc="Read the data array";
+    L3TagArrayWrite,    desc="Write the data array";
+  }
+
+  // TYPES
+
+  // DirectoryEntry
+  structure(Entry, desc="...", interface="AbstractEntry") {
+    State DirectoryState,          desc="Directory state";
+    DataBlock DataBlk,             desc="data for the block";
+    NetDest VicDirtyIgnore,  desc="VicDirty coming from whom to ignore";
+  }
+
+  structure(CacheEntry, desc="...", interface="AbstractCacheEntry") {
+    DataBlock DataBlk,          desc="data for the block";
+    MachineID LastSender,       desc="Mach which this block came from";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,     desc="Transient state";
+    DataBlock DataBlk,  desc="data for the block";
+    bool Dirty,         desc="Is the data dirty?";
+    int NumPendingAcks,        desc="num acks expected";
+    MachineID OriginalRequestor,        desc="Original Requestor";
+    MachineID WTRequestor,        desc="WT Requestor";
+    bool Cached,        desc="data hit in Cache";
+    bool MemData,       desc="Got MemData?",default="false";
+    bool wtData,       desc="Got write through data?",default="false";
+    bool atomicData,   desc="Got Atomic op?",default="false";
+    Cycles InitialRequestTime, desc="...";
+    Cycles ForwardRequestTime, desc="...";
+    Cycles ProbeRequestStartTime, desc="...";
+    MachineID LastSender, desc="Mach which this block came from";
+    bool L3Hit, default="false", desc="Was this an L3 hit?";
+    uint64_t probe_id,        desc="probe id for lifetime profiling";
+    WriteMask writeMask,    desc="outstanding write through mask";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
+
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  void set_tbe(TBE a);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" {
+    Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr));
+
+    if (is_valid(dir_entry)) {
+      return dir_entry;
+    }
+
+    dir_entry :=  static_cast(Entry, "pointer",
+                              directory.allocate(addr, new Entry));
+    return dir_entry;
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if (is_valid(tbe) && tbe.MemData) {
+      DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe);
+      return tbe.DataBlk;
+    }
+    DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr));
+    return getDirectoryEntry(addr).DataBlk;
+  }
+
+  State getState(TBE tbe, CacheEntry entry, Addr addr) {
+    return getDirectoryEntry(addr).DirectoryState;
+  }
+
+  void setState(TBE tbe, CacheEntry entry, Addr addr, State state) {
+    getDirectoryEntry(addr).DirectoryState := state;
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes
+        + functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    // For this Directory, all permissions are just tracked in Directory, since
+    // it's not possible to have something in TBE but not Dir, just keep track
+    // of state all in one place.
+    if (directory.isPresent(addr)) {
+      return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(CacheEntry entry, Addr addr, State state) {
+    getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state));
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L3DataArrayRead) {
+        L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L3DataArrayWrite) {
+        L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L3TagArrayRead) {
+        L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L3TagArrayWrite) {
+        L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L3DataArrayRead) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L3DataArrayWrite) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L3TagArrayRead) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L3TagArrayWrite) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  // ** OUT_PORTS **
+  out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore);
+  out_port(responseNetwork_out, ResponseMsg, responseToCore);
+
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+  out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue);
+
+  // ** IN_PORTS **
+
+  // Trigger Queue
+  in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=5) {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == TriggerType:AcksComplete) {
+          trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe);
+        }else if (in_msg.Type == TriggerType:UnblockWriteThrough) {
+          trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe);
+        } else {
+          error("Unknown trigger msg");
+        }
+      }
+    }
+  }
+
+  in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=4) {
+    if (L3TriggerQueue_in.isReady(clockEdge())) {
+      peek(L3TriggerQueue_in, TriggerMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == TriggerType:L3Hit) {
+          trigger(Event:L3Hit, in_msg.addr, entry, tbe);
+        } else {
+          error("Unknown trigger msg");
+        }
+      }
+    }
+  }
+
+  // Unblock Network
+  in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=3) {
+    if (unblockNetwork_in.isReady(clockEdge())) {
+      peek(unblockNetwork_in, UnblockMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        trigger(Event:CoreUnblock, in_msg.addr, entry, tbe);
+      }
+    }
+  }
+
+  // Core response network
+  in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=2) {
+    if (responseNetwork_in.isReady(clockEdge())) {
+      peek(responseNetwork_in, ResponseMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+          trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:CPUData) {
+          trigger(Event:CPUData, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+            trigger(Event:StaleWB, in_msg.addr, entry, tbe);
+        } else {
+          error("Unexpected response type");
+        }
+      }
+    }
+  }
+
+  // off-chip memory request/response is done
+  in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=1) {
+    if (memQueue_in.isReady(clockEdge())) {
+      peek(memQueue_in, MemoryMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
+          trigger(Event:MemData, in_msg.addr, entry, tbe);
+          DPRINTF(RubySlicc, "%s\n", in_msg);
+        } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
+          trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them.
+        } else {
+          DPRINTF(RubySlicc, "%s\n", in_msg.Type);
+          error("Invalid message");
+        }
+      }
+    }
+  }
+
+  in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) {
+    if (requestNetwork_in.isReady(clockEdge())) {
+      peek(requestNetwork_in, CPURequestMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == CoherenceRequestType:RdBlk) {
+          trigger(Event:RdBlk, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+          trigger(Event:RdBlkS, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+          trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+          trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+          trigger(Event:Atomic, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+          if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+            DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr);
+            trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+          } else {
+            DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr);
+            trigger(Event:VicDirty, in_msg.addr, entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+          if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+            DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr);
+            trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+          } else {
+            DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr);
+            trigger(Event:VicClean, in_msg.addr, entry, tbe);
+          }
+        } else {
+          error("Bad request message type");
+        }
+      }
+    }
+  }
+
+  // Actions
+  action(s_sendResponseS, "s", desc="send Shared response") {
+    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:NBSysResp;
+      if (tbe.L3Hit) {
+        out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+      } else {
+        out_msg.Sender := machineID;
+      }
+      out_msg.Destination.add(tbe.OriginalRequestor);
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := false;
+      out_msg.State := CoherenceState:Shared;
+      out_msg.InitialRequestTime := tbe.InitialRequestTime;
+      out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+      out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+      out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.L3Hit := tbe.L3Hit;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(es_sendResponseES, "es", desc="send Exclusive or Shared response") {
+    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:NBSysResp;
+      if (tbe.L3Hit) {
+        out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+      } else {
+        out_msg.Sender := machineID;
+      }
+      out_msg.Destination.add(tbe.OriginalRequestor);
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := tbe.Dirty;
+      if (tbe.Cached) {
+        out_msg.State := CoherenceState:Shared;
+      } else {
+        out_msg.State := CoherenceState:Exclusive;
+      }
+      out_msg.InitialRequestTime := tbe.InitialRequestTime;
+      out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+      out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+      out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.L3Hit := tbe.L3Hit;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(m_sendResponseM, "m", desc="send Modified response") {
+    if (tbe.wtData) {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:UnblockWriteThrough;
+      }
+    }else{
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysResp;
+        if (tbe.L3Hit) {
+          out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+        } else {
+          out_msg.Sender := machineID;
+        }
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.Dirty := tbe.Dirty;
+        out_msg.State := CoherenceState:Modified;
+        out_msg.CtoD := false;
+        out_msg.InitialRequestTime := tbe.InitialRequestTime;
+        out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+        out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+        out_msg.OriginalResponder := tbe.LastSender;
+        if(tbe.atomicData){
+          out_msg.WTRequestor := tbe.WTRequestor;
+        }
+        out_msg.L3Hit := tbe.L3Hit;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+      if (tbe.atomicData) {
+        enqueue(triggerQueue_out, TriggerMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Type := TriggerType:UnblockWriteThrough;
+        }
+      }
+    }
+  }
+
+  action(c_sendResponseCtoD, "c", desc="send CtoD Ack") {
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        out_msg.Dirty := false;
+        out_msg.State := CoherenceState:Modified;
+        out_msg.CtoD := true;
+        out_msg.InitialRequestTime := tbe.InitialRequestTime;
+        out_msg.ForwardRequestTime := curCycle();
+        out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+  }
+
+  action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysWBAck;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := curCycle();
+        out_msg.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(l_queueMemWBReq, "lq", desc="Write WB data to memory") {
+    peek(responseNetwork_in, ResponseMsg) {
+      queueMemoryWrite(machineID, address, to_memory_controller_latency,
+                       in_msg.DataBlk);
+    }
+  }
+
+  action(l_queueMemRdReq, "lr", desc="Read data from memory") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) {
+          out_msg.addr := address;
+          out_msg.Type := TriggerType:L3Hit;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        if (tbe.Dirty == false) {
+          tbe.DataBlk := entry.DataBlk;
+        }
+        tbe.LastSender := entry.LastSender;
+        tbe.L3Hit := true;
+        tbe.MemData := true;
+        L3CacheMemory.deallocate(address);
+      } else {
+        queueMemoryRead(machineID, address, to_memory_controller_latency);
+      }
+    }
+  }
+
+  action(dc_probeInvCoreData, "dc", desc="probe inv cores, return data") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbInv;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination.broadcast(MachineType:CorePair);  // won't be realistic for multisocket
+
+        // add relevant TCC node to list. This replaces all TCPs and SQCs
+        if (((in_msg.Type == CoherenceRequestType:WriteThrough ||
+              in_msg.Type == CoherenceRequestType:Atomic) &&
+             in_msg.NoWriteConflict) ||
+            CPUonly) {
+        } else if (noTCCdir) {
+          out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                  TCC_select_low_bit, TCC_select_num_bits));
+        } else {
+	      out_msg.Destination.add(mapAddressToRange(address,
+                                                    MachineType:TCCdir,
+                            TCC_select_low_bit, TCC_select_num_bits));
+        }
+        out_msg.Destination.remove(in_msg.Requestor);
+        tbe.NumPendingAcks := out_msg.Destination.count();
+        if (tbe.NumPendingAcks == 0) {
+          enqueue(triggerQueue_out, TriggerMsg, 1) {
+            out_msg.addr := address;
+            out_msg.Type := TriggerType:AcksComplete;
+          }
+        }
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+        APPEND_TRANSITION_COMMENT(" dc: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
+    peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbDowngrade;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination.broadcast(MachineType:CorePair);  // won't be realistic for multisocket
+        // add relevant TCC node to the list. This replaces all TCPs and SQCs
+        if (noTCCdir || CPUonly) {
+          //Don't need to notify TCC about reads
+        } else {
+	      out_msg.Destination.add(mapAddressToRange(address,
+                                                    MachineType:TCCdir,
+                            TCC_select_low_bit, TCC_select_num_bits));
+          tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+        }
+        if (noTCCdir && !CPUonly) {
+          out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                  TCC_select_low_bit, TCC_select_num_bits));
+        }
+        out_msg.Destination.remove(in_msg.Requestor);
+        tbe.NumPendingAcks := out_msg.Destination.count();
+        if (tbe.NumPendingAcks == 0) {
+          enqueue(triggerQueue_out, TriggerMsg, 1) {
+            out_msg.addr := address;
+            out_msg.Type := TriggerType:AcksComplete;
+          }
+        }
+        DPRINTF(RubySlicc, "%s\n", (out_msg));
+        APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") {
+    peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbInv;
+        out_msg.ReturnData := false;
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Destination.broadcast(MachineType:CorePair);  // won't be realistic for multisocket
+
+        // add relevant TCC node to the list. This replaces all TCPs and SQCs
+        if (noTCCdir && !CPUonly) {
+            out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                              TCC_select_low_bit, TCC_select_num_bits));
+        } else {
+            if (!noTCCdir) {
+                out_msg.Destination.add(mapAddressToRange(address,
+                                                          MachineType:TCCdir,
+                                                          TCC_select_low_bit,
+                                                          TCC_select_num_bits));
+            }
+        }
+        out_msg.Destination.remove(in_msg.Requestor);
+        tbe.NumPendingAcks := out_msg.Destination.count();
+        if (tbe.NumPendingAcks == 0) {
+          enqueue(triggerQueue_out, TriggerMsg, 1) {
+            out_msg.addr := address;
+            out_msg.Type := TriggerType:AcksComplete;
+          }
+        }
+        APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(d_writeDataToMemory, "d", desc="Write data to memory") {
+    peek(responseNetwork_in, ResponseMsg) {
+      getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+      if (tbe.Dirty == false) {
+          // have to update the TBE, too, because of how this
+          // directory deals with functional writes
+        tbe.DataBlk := in_msg.DataBlk;
+      }
+    }
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    peek(requestNetwork_in, CPURequestMsg) {
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        tbe.writeMask.clear();
+        tbe.writeMask.orMask(in_msg.writeMask);
+        tbe.wtData := true;
+        tbe.WTRequestor := in_msg.WTRequestor;
+        tbe.LastSender := in_msg.Requestor;
+      }
+      if (in_msg.Type == CoherenceRequestType:Atomic) {
+        tbe.writeMask.clear();
+        tbe.writeMask.orMask(in_msg.writeMask);
+        tbe.atomicData := true;
+        tbe.WTRequestor := in_msg.WTRequestor;
+        tbe.LastSender := in_msg.Requestor;
+      }
+      tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+      tbe.Dirty := false;
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        tbe.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
+        tbe.Dirty := true;
+      }
+      tbe.OriginalRequestor := in_msg.Requestor;
+      tbe.NumPendingAcks := 0;
+      tbe.Cached := in_msg.ForceShared;
+      tbe.InitialRequestTime := in_msg.InitialRequestTime;
+    }
+  }
+
+  action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+    if (tbe.Dirty == false) {
+        getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    }
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(wd_writeBackData, "wd", desc="Write back data if needed") {
+    if (tbe.wtData) {
+      getDirectoryEntry(address).DataBlk.copyPartial(tbe.DataBlk, tbe.writeMask);
+    } else if (tbe.atomicData) {
+      tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask);
+      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    } else if (tbe.Dirty == false) {
+      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    }
+  }
+
+  action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") {
+    peek(memQueue_in, MemoryMsg) {
+      if (tbe.wtData == true) {
+          // do nothing
+      } else if (tbe.Dirty == false) {
+        tbe.DataBlk := getDirectoryEntry(address).DataBlk;
+      }
+      tbe.MemData := true;
+    }
+  }
+
+  action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (in_msg.Dirty) {
+        if (tbe.wtData) {
+          DataBlock tmp := in_msg.DataBlk;
+          tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+          tbe.DataBlk := tmp;
+          tbe.writeMask.fillMask();
+        } else if (tbe.Dirty) {
+          if(tbe.atomicData == false && tbe.wtData == false) {
+            DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
+            assert(tbe.DataBlk == in_msg.DataBlk);  // in case of double data
+          }
+        } else {
+          tbe.DataBlk := in_msg.DataBlk;
+          tbe.Dirty := in_msg.Dirty;
+          tbe.LastSender := in_msg.Sender;
+        }
+      }
+      if (in_msg.Hit) {
+        tbe.Cached := true;
+      }
+    }
+  }
+
+  action(mwc_markSinkWriteCancel, "mwc", desc="Mark to sink impending VicDirty") {
+    peek(responseNetwork_in, ResponseMsg) {
+      getDirectoryEntry(address).VicDirtyIgnore.add(in_msg.Sender);
+      APPEND_TRANSITION_COMMENT(" setting bit to sink VicDirty ");
+    }
+  }
+
+  action(x_decrementAcks, "x", desc="decrement Acks pending") {
+    tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+    APPEND_TRANSITION_COMMENT(" Acks remaining: ");
+    APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+  }
+
+  action(o_checkForCompletion, "o", desc="check for ack completion") {
+    if (tbe.NumPendingAcks == 0) {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:AcksComplete;
+      }
+    }
+    APPEND_TRANSITION_COMMENT(" Check: Acks remaining: ");
+    APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+  }
+
+  action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor);
+    }
+  }
+
+  action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+        entry.DataBlk := in_msg.DataBlk;
+        entry.LastSender := in_msg.Sender;
+      } else {
+        if (L3CacheMemory.cacheAvail(address) == false) {
+          Addr victim := L3CacheMemory.cacheProbe(address);
+          CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+                                                 L3CacheMemory.lookup(victim));
+          queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+                           victim_entry.DataBlk);
+          L3CacheMemory.deallocate(victim);
+        }
+        assert(L3CacheMemory.cacheAvail(address));
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+        entry.DataBlk := in_msg.DataBlk;
+
+        entry.LastSender := in_msg.Sender;
+      }
+    }
+  }
+
+  action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") {
+    if ((tbe.wtData || tbe.atomicData) && useL3OnWT) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+        entry.DataBlk := tbe.DataBlk;
+        entry.LastSender := tbe.LastSender;
+      } else {
+        if (L3CacheMemory.cacheAvail(address) == false) {
+          Addr victim := L3CacheMemory.cacheProbe(address);
+          CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+                                                 L3CacheMemory.lookup(victim));
+          queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+                           victim_entry.DataBlk);
+          L3CacheMemory.deallocate(victim);
+        }
+        assert(L3CacheMemory.cacheAvail(address));
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+        entry.DataBlk := tbe.DataBlk;
+        entry.LastSender := tbe.LastSender;
+      }
+    }
+  }
+
+  action(sf_setForwardReqTime, "sf", desc="...") {
+    tbe.ForwardRequestTime := curCycle();
+  }
+
+  action(dl_deallocateL3, "dl", desc="deallocate the L3 block") {
+    L3CacheMemory.deallocate(address);
+  }
+
+  action(p_popRequestQueue, "p", desc="pop request queue") {
+    requestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="pop response queue") {
+    responseNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pm_popMemQueue, "pm", desc="pop mem queue") {
+    memQueue_in.dequeue(clockEdge());
+  }
+
+  action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
+    triggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") {
+    L3TriggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(pu_popUnblockQueue, "pu", desc="pop unblock queue") {
+    unblockNetwork_in.dequeue(clockEdge());
+  }
+
+  action(zz_recycleRequestQueue, "zz", desc="recycle request queue") {
+    requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(yy_recycleResponseQueue, "yy", desc="recycle response queue") {
+    responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") {
+    stall_and_wait(requestNetwork_in, address);
+  }
+
+  action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") {
+    wakeUpBuffers(address);
+  }
+
+  action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") {
+    wakeUpAllBuffers();
+  }
+
+  action(z_stall, "z", desc="...") {
+  }
+
+  // TRANSITIONS
+  transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
+      st_stallAndWaitRequest;
+  }
+
+  // It may be possible to save multiple invalidations here!
+  transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {Atomic, WriteThrough}) {
+      st_stallAndWaitRequest;
+  }
+
+
+  // transitions from U
+  transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead} {
+    t_allocateTBE;
+    l_queueMemRdReq;
+    sc_probeShrCoreData;
+    p_popRequestQueue;
+  }
+
+  transition(U, WriteThrough, BM_PM) {L3TagArrayRead, L3TagArrayWrite} {
+    t_allocateTBE;
+    w_sendResponseWBAck;
+    l_queueMemRdReq;
+    dc_probeInvCoreData;
+    p_popRequestQueue;
+  }
+
+  transition(U, Atomic, BM_PM) {L3TagArrayRead, L3TagArrayWrite} {
+    t_allocateTBE;
+    l_queueMemRdReq;
+    dc_probeInvCoreData;
+    p_popRequestQueue;
+  }
+
+  transition(U, {RdBlkM}, BM_PM) {L3TagArrayRead} {
+    t_allocateTBE;
+    l_queueMemRdReq;
+    dc_probeInvCoreData;
+    p_popRequestQueue;
+  }
+
+  transition(U, RdBlk, B_PM) {L3TagArrayRead}{
+    t_allocateTBE;
+    l_queueMemRdReq;
+    sc_probeShrCoreData;
+    p_popRequestQueue;
+  }
+
+  transition(U, CtoD, BP) {L3TagArrayRead} {
+    t_allocateTBE;
+    ic_probeInvCore;
+    p_popRequestQueue;
+  }
+
+  transition(U, VicDirty, BL) {L3TagArrayRead} {
+    t_allocateTBE;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(U, VicClean, BL) {L3TagArrayRead} {
+    t_allocateTBE;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition(BL, {VicDirty, VicClean}) {
+    zz_recycleRequestQueue;
+  }
+
+  transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} {
+    d_writeDataToMemory;
+    al_allocateL3Block;
+    wa_wakeUpDependents;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  transition(BL, StaleWB, U) {L3TagArrayWrite} {
+    dt_deallocateTBE;
+    wa_wakeUpAllDependents;
+    pr_popResponseQueue;
+  }
+
+  transition({B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
+    z_stall;
+  }
+
+  transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, WBAck) {
+    pm_popMemQueue;
+  }
+
+  transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) {
+    rv_removeVicDirtyIgnore;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition({B}, CoreUnblock, U) {
+    wa_wakeUpDependents;
+    pu_popUnblockQueue;
+  }
+
+  transition(B, UnblockWriteThrough, U) {
+    wa_wakeUpDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(BS_PM, MemData, BS_Pm) {} {
+    mt_writeMemDataToTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BM_PM, MemData, BM_Pm){} {
+    mt_writeMemDataToTBE;
+    pm_popMemQueue;
+  }
+
+  transition(B_PM, MemData, B_Pm){} {
+    mt_writeMemDataToTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BS_PM, L3Hit, BS_Pm) {} {
+    ptl_popTriggerQueue;
+  }
+
+  transition(BM_PM, L3Hit, BM_Pm) {} {
+    ptl_popTriggerQueue;
+  }
+
+  transition(B_PM, L3Hit, B_Pm) {} {
+    ptl_popTriggerQueue;
+  }
+
+  transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+    mt_writeMemDataToTBE;
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+    mt_writeMemDataToTBE;
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+  transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+    mt_writeMemDataToTBE;
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    ptl_popTriggerQueue;
+  }
+
+  transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    ptl_popTriggerQueue;
+  }
+
+  transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    ptl_popTriggerQueue;
+  }
+
+  transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BP}, CPUPrbResp) {
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    o_checkForCompletion;
+    pr_popResponseQueue;
+  }
+
+  transition(BS_PM, ProbeAcksComplete, BS_M) {} {
+    sf_setForwardReqTime;
+    pt_popTriggerQueue;
+  }
+
+  transition(BM_PM, ProbeAcksComplete, BM_M) {} {
+    sf_setForwardReqTime;
+    pt_popTriggerQueue;
+  }
+
+  transition(B_PM, ProbeAcksComplete, B_M){} {
+    sf_setForwardReqTime;
+    pt_popTriggerQueue;
+  }
+
+  transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+    sf_setForwardReqTime;
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+    sf_setForwardReqTime;
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+    sf_setForwardReqTime;
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
+    sf_setForwardReqTime;
+    c_sendResponseCtoD;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-msg.sm b/src/mem/protocol/MOESI_AMD_Base-msg.sm
new file mode 100644
index 000000000..ff8842369
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-msg.sm
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+
+enumeration(CoherenceRequestType, desc="Coherence Request Types") {
+  // CPU Request Types ONLY
+  RdBlk,        desc="Read Blk";
+  RdBlkM,       desc="Read Blk Modified";
+  RdBlkS,       desc="Read Blk Shared";
+  CtoD,         desc="Change To Dirty";
+  VicClean,     desc="L2 clean eviction";
+  VicDirty,     desc="L2 dirty eviction";
+  Atomic,       desc="Upper level atomic";
+  AtomicWriteBack, desc="Upper level atomic";
+  WriteThrough, desc="Ordered WriteThrough w/Data";
+  WriteThroughFifo, desc="WriteThrough with no data";
+  WriteThroughDummy, desc="WriteThrough with no data for atomic operation";
+  WriteFlush,   desc="Release Flush";
+
+  WrCancel,     desc="want to cancel WB to Memory"; // should this be here?
+
+  WBApproval,   desc="WB Approval";
+
+  // Messages between Dir and R-Dir
+  ForceInv,     desc="Send invalide to the block";
+  ForceDowngrade, desc="Send downgrade to the block";
+  Unblock,      desc="Used to let the dir know a message has been sunk";
+
+  // Messages between R-Dir and R-Buffer
+  PrivateNotify, desc="Let region buffer know it has private access";
+  SharedNotify,  desc="Let region buffer know it has shared access";
+  WbNotify,      desc="Let region buffer know it saw its wb request";
+  Downgrade,     desc="Force the region buffer to downgrade to shared";
+  // Response to R-Dir (probably should be on a different network, but
+  // I need it to be ordered with respect to requests)
+  InvAck,       desc="Let the R-Dir know when the inv has occured";
+
+  PrivateRequest, desc="R-buf wants the region in private";
+  UpgradeRequest, desc="R-buf wants the region in private";
+  SharedRequest,  desc="R-buf wants the region in shared (could respond with private)";
+  CleanWbRequest, desc="R-buf wants to deallocate clean region";
+
+  NA,             desc="So we don't get segfaults";
+}
+
+enumeration(ProbeRequestType, desc="Probe Request Types") {
+  PrbDowngrade,    desc="Probe for Status";  // EtoS, MtoO, StoS
+  PrbInv,       desc="Probe to Invalidate";
+
+  // For regions
+  PrbRepl,      desc="Force the cache to do a replacement";
+  PrbRegDowngrade, desc="Probe for Status";  // EtoS, MtoO, StoS
+  PrbAtomic,    desc="Forwarded Atomic Operation";
+}
+
+
+enumeration(CoherenceResponseType, desc="Coherence Response Types") {
+  NBSysResp,       desc="Northbridge response to CPU Rd request";
+  NBSysWBAck,      desc="Northbridge response ok to WB";
+  TDSysResp,       desc="TCCdirectory response to CPU Rd request";
+  TDSysWBAck,      desc="TCCdirectory response ok to WB";
+  TDSysWBNack,     desc="TCCdirectory response ok to drop";
+  CPUPrbResp,      desc="CPU Probe Response";
+  CPUData,         desc="CPU Data";
+  StaleNotif,      desc="Notification of Stale WBAck, No data to writeback";
+  CPUCancelWB,     desc="want to cancel WB to Memory";
+  MemData,         desc="Data from Memory";
+
+  // for regions
+  PrivateAck,      desc="Ack that r-buf received private notify";
+  RegionWbAck,     desc="Writeback Ack that r-buf completed deallocation";
+  DirReadyAck,     desc="Directory (mem ctrl)<->region dir handshake";
+}
+
+enumeration(CoherenceState, default="CoherenceState_NA", desc="Coherence State") {
+  Modified,             desc="Modified";
+  Owned,                desc="Owned state";
+  Exclusive,            desc="Exclusive";
+  Shared,               desc="Shared";
+  NA,                   desc="NA";
+}
+
+structure(CPURequestMsg, desc="...", interface="Message") {
+  Addr addr,             desc="Physical address for this request";
+  Addr DemandAddress,       desc="Physical block address for this request";
+  CoherenceRequestType Type,   desc="Type of request";
+  DataBlock DataBlk,           desc="data for the cache line";  // only for WB
+  bool Dirty,                   desc="whether WB data is dirty";  // only for WB
+  MachineID Requestor,            desc="Node who initiated the request";
+  NetDest Destination,             desc="Multicast destination mask";
+  bool Shared,                  desc="For CPU_WrVicBlk, vic is O not M.  For CPU_ClVicBlk, vic is S";
+  MessageSizeType MessageSize, desc="size category of the message";
+  Cycles InitialRequestTime, desc="time the initial requests was sent from the L1Cache";
+  Cycles ForwardRequestTime, desc="time the dir forwarded the request";
+  Cycles ProbeRequestStartTime, desc="the time the dir started the probe request";
+  bool DemandRequest, default="false", desc="For profiling purposes";
+
+  NetDest Sharers,              desc="Caches that may have a valid copy of the data";
+  bool ForceShared,             desc="R-dir knows it is shared, pass on so it sends an S copy, not E";
+  bool Private, default="false", desc="Requestor already has private permissions, no need for dir check";
+  bool CtoDSinked, default="false", desc="This is true if the CtoD previously sent must have been sunk";
+
+  bool NoAckNeeded, default="false", desc="True if region buffer doesn't need to ack";
+  int Acks, default="0", desc="Acks that the dir (mem ctrl) should expect to receive";
+  CoherenceRequestType OriginalType, default="CoherenceRequestType_NA",  desc="Type of request from core fwded through region buffer";
+  WriteMask writeMask, desc="Write Through Data";
+  MachineID WTRequestor,            desc="Node who initiated the write through";
+  HSAScope scope,                      default="HSAScope_SYSTEM", desc="Request Scope";
+  int wfid,                         default="0", desc="wavefront id";
+  bool NoWriteConflict,             default="true", desc="write collided with CAB entry";
+  int ProgramCounter,               desc="PC that accesses to this block";
+
+  bool functionalRead(Packet *pkt) {
+    // Only PUTX messages contains the data block
+    if (Type == CoherenceRequestType:VicDirty) {
+        return testAndRead(addr, DataBlk, pkt);
+    }
+
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return testAndWrite(addr, DataBlk, pkt);
+  }
+}
+
+structure(NBProbeRequestMsg, desc="...", interface="Message") {
+  Addr addr,              desc="Physical address for this request";
+  ProbeRequestType Type,             desc="NB_PrbNxtState signal";
+  bool ReturnData,              desc="Indicates CPU should return data";
+  NetDest Destination,             desc="Node to whom the data is sent";
+  MessageSizeType MessageSize, desc="size category of the message";
+  bool DemandRequest, default="false", desc="demand request, requesting 3-hop transfer";
+  Addr DemandAddress,        desc="Demand block address for a region request";
+  MachineID Requestor,          desc="Requestor id for 3-hop requests";
+  bool NoAckNeeded, default="false", desc="For short circuting acks";
+  int ProgramCounter,           desc="PC that accesses to this block";
+
+  bool functionalRead(Packet *pkt) {
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return false;
+  }
+
+}
+
+structure(TDProbeRequestMsg, desc="...", interface="Message") {
+  Addr addr,              desc="Physical address for this request";
+  ProbeRequestType Type,  desc="TD_PrbNxtState signal";
+  bool ReturnData,        desc="Indicates CPU should return data";
+  bool localCtoD,         desc="Indicates CtoD is within the GPU hierarchy (aka TCC subtree)";
+  NetDest Destination,    desc="Node to whom the data is sent";
+  MessageSizeType MessageSize, desc="size category of the message";
+  int Phase,              desc="Synchronization Phase";
+  int wfid,               desc="wavefront id for Release";
+  MachineID Requestor,    desc="Node who initiated the request";
+
+  bool functionalRead(Packet *pkt) {
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return false;
+  }
+}
+
+// Response Messages seemed to be easily munged into one type
+structure(ResponseMsg, desc="...", interface="Message") {
+  Addr addr,             desc="Physical address for this request";
+  CoherenceResponseType Type,  desc="NB Sys Resp or CPU Response to Probe";
+  MachineID Sender,               desc="Node who sent the data";
+  NetDest Destination,             desc="Node to whom the data is sent";
+  // Begin Used Only By CPU Response
+  DataBlock DataBlk,           desc="data for the cache line";
+  bool Hit,                    desc="probe hit valid line";
+  bool Shared,                 desc="True if S, or if NB Probe ReturnData==1 && O";
+  bool Dirty,                  desc="Is the data dirty (different than memory)?";
+  bool Ntsl,                   desc="indicates probed lin will be invalid after probe";
+  bool UntransferredOwner,     desc="pending confirmation of ownership change";
+  // End Used Only By CPU Response
+
+  // Begin NB Response Only
+  CoherenceState State, default=CoherenceState_NA,        desc="What returned data from NB should be in";
+  bool CtoD,                    desc="was the originator a CtoD?";
+  // End NB Response Only
+
+  // Normally if a block gets hit by a probe while waiting to be written back,
+  // you flip the NbReqShared signal (part of the CPURequest signal group).
+  // But since this is in packets and I don't want to send a separate packet,
+  // let's just send this signal back with the data instead
+  bool NbReqShared,             desc="modification of Shared field from initial request, e.g. hit by shared probe";
+
+  MessageSizeType MessageSize, desc="size category of the message";
+  Cycles InitialRequestTime, desc="time the initial requests was sent from the L1Cache";
+  Cycles ForwardRequestTime, desc="time the dir forwarded the request";
+  Cycles ProbeRequestStartTime, desc="the time the dir started the probe request";
+  bool DemandRequest, default="false", desc="For profiling purposes";
+
+  bool L3Hit, default="false", desc="Did memory or L3 supply the data?";
+  MachineID OriginalResponder, desc="Mach which wrote the data to the L3";
+  MachineID WTRequestor,             desc="Node who started the writethrough";
+
+  bool NotCached, default="false", desc="True when the Region buffer has already evicted the line";
+
+  bool NoAckNeeded, default="false", desc="For short circuting acks";
+  bool isValid, default="false", desc="Is acked block valid";
+  int wfid, default="0", desc="wavefront id";
+  int Phase,                   desc="Synchronization Phase";
+
+  int ProgramCounter,       desc="PC that issues this request";
+  bool mispred,              desc="tell TCP if the block should not be bypassed";
+
+
+  bool functionalRead(Packet *pkt) {
+    // Only PUTX messages contains the data block
+    if (Type == CoherenceResponseType:CPUData ||
+        Type == CoherenceResponseType:MemData) {
+        return testAndRead(addr, DataBlk, pkt);
+    }
+
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return testAndWrite(addr, DataBlk, pkt);
+  }
+}
+
+structure(UnblockMsg, desc="...", interface="Message") {
+  Addr addr,              desc="Physical address for this request";
+  NetDest Destination,          desc="Destination (always directory)";
+  MessageSizeType MessageSize, desc="size category of the message";
+  MachineID Sender,               desc="Node who sent the data";
+  bool currentOwner, default="false", desc="Is the sender the current owner";
+  bool DoneAck, default="false", desc="Is this a done ack?";
+  bool Dirty, default="false", desc="Was block dirty when evicted";
+  bool wasValid, default="false", desc="Was block valid when evicted";
+  bool valid, default="false", desc="Is block valid";
+  bool validToInvalid, default="false", desc="Was block valid when evicted";
+
+  bool functionalRead(Packet *pkt) {
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return false;
+  }
+}
+
+enumeration(TriggerType, desc="Trigger Type") {
+  L2_to_L1,             desc="L2 to L1 fill";
+  AcksComplete,         desc="NB received all needed Acks";
+
+  // For regions
+  InvNext,              desc="Invalidate the next block";
+  PrivateAck,           desc="Loopback ack for machines with no Region Buffer";
+  AllOutstanding,       desc="All outstanding requests have finished";
+  L3Hit,                desc="L3 hit in dir";
+
+  // For region directory once the directory is blocked
+  InvRegion,            desc="Invalidate region";
+  DowngradeRegion,      desc="downgrade region";
+  //For writethrough
+  UnblockWriteThrough,  desc="unblock";
+  WriteData,            desc="Write to full cacheblock data";
+  WriteDone,            desc="Sequencer says that write is done";
+  AtomicDone,           desc="Atomic is done";
+}
+
+enumeration(CacheId, desc="Which Cache in the Core") {
+  L1I,          desc="L1 I-cache";
+  L1D0,         desc="L1 D-cache cluster 0";
+  L1D1,         desc="L1 D-cache cluster 1";
+  NA,           desc="Default";
+}
+
+structure(TriggerMsg, desc="...", interface="Message") {
+  Addr addr,              desc="Address";
+  TriggerType Type,             desc="Type of trigger";
+  CacheId Dest,         default="CacheId_NA", desc="Cache to invalidate";
+  int ProgramCounter,           desc="PC that accesses to this block";
+
+  bool functionalRead(Packet *pkt) {
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return false;
+  }
+
+}
+
+enumeration(FifoType, desc="Fifo Type") {
+  WriteDummy,          desc="Dummy Write for atomic operation";
+  WriteThrough,        desc="simple writethrough request";
+  WriteFlush,          desc="synchronization message";
+}
+
+structure(FifoMsg, desc="...", interface="Message") {
+  Addr addr,          desc="Address";
+  FifoType Type,            desc="WriteThrough/WriteFlush";
+  int wfid,                 default="0",desc="wavefront id";
+  MachineID Requestor,      desc="Flush Requestor";
+  MachineID oRequestor,      desc="original Flush Requestor";
+
+  bool functionalRead(Packet *pkt) {
+    return false;
+  }
+
+  bool functionalWrite(Packet *pkt) {
+    // No check on message type required since the protocol should
+    // read data from those messages that contain the block
+    return false;
+  }
+
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm
new file mode 100644
index 000000000..f545c2fa7
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm
@@ -0,0 +1,1408 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu, Sooraj Puthoor
+ */
+
+/*
+ * This file is based on MOESI_AMD_Base.sm
+ * Differences with AMD base protocol
+ * -- Uses a probe filter memory to track sharers.
+ * -- The probe filter can be inclusive or non-inclusive
+ * -- Only two sharers tracked. Sharers are a) GPU or/and  b) CPU
+ * -- If sharer information available, the sharer is probed
+ * -- If sharer information not available, probes are broadcasted
+ */
+
+machine(MachineType:Directory, "AMD Baseline protocol")
+: DirectoryMemory * directory;
+  CacheMemory * L3CacheMemory;
+  CacheMemory * ProbeFilterMemory;
+  Cycles response_latency := 5;
+  Cycles l3_hit_latency := 50;
+  bool noTCCdir := "False";
+  bool CAB_TCC := "False";
+  int TCC_select_num_bits:=1;
+  bool useL3OnWT := "False";
+  bool inclusiveDir := "True";
+  Cycles to_memory_controller_latency := 1;
+
+  // From the Cores
+  MessageBuffer * requestFromCores, network="From", virtual_network="0", ordered="false", vnet_type="request";
+  MessageBuffer * responseFromCores, network="From", virtual_network="2", ordered="false", vnet_type="response";
+  MessageBuffer * unblockFromCores, network="From", virtual_network="4", ordered="false", vnet_type="unblock";
+
+  MessageBuffer * probeToCore, network="To", virtual_network="0", ordered="false", vnet_type="request";
+  MessageBuffer * responseToCore, network="To", virtual_network="2", ordered="false", vnet_type="response";
+
+  MessageBuffer * triggerQueue, ordered="true";
+  MessageBuffer * L3triggerQueue, ordered="true";
+  MessageBuffer * responseFromMemory;
+{
+  // STATES
+  state_declaration(State, desc="Directory states", default="Directory_State_U") {
+    U, AccessPermission:Backing_Store,                 desc="unblocked";
+    BL, AccessPermission:Busy,                  desc="got L3 WB request";
+    // BL is Busy because it is busy waiting for the data
+    // which is possibly in the network. The cache which evicted the data
+    // might have moved to some other state after doing the eviction
+    // BS==> Received a read request; has not requested ownership
+    // B==> Received a read request; has requested ownership
+    // BM==> Received a modification request
+    B_P, AccessPermission:Backing_Store,      desc="Back invalidation, waiting for probes";
+    BS_M, AccessPermission:Backing_Store,     desc="blocked waiting for memory";
+    BM_M, AccessPermission:Backing_Store,     desc="blocked waiting for memory";
+    B_M, AccessPermission:Backing_Store,      desc="blocked waiting for memory";
+    BP, AccessPermission:Backing_Store,       desc="blocked waiting for probes, no need for memory";
+    BS_PM, AccessPermission:Backing_Store,    desc="blocked waiting for probes and Memory";
+    BM_PM, AccessPermission:Backing_Store,    desc="blocked waiting for probes and Memory";
+    B_PM, AccessPermission:Backing_Store,     desc="blocked waiting for probes and Memory";
+    BS_Pm, AccessPermission:Backing_Store,    desc="blocked waiting for probes, already got memory";
+    BM_Pm, AccessPermission:Backing_Store,    desc="blocked waiting for probes, already got memory";
+    B_Pm, AccessPermission:Backing_Store,     desc="blocked waiting for probes, already got memory";
+    B, AccessPermission:Backing_Store,        desc="sent response, Blocked til ack";
+  }
+
+  // Events
+  enumeration(Event, desc="Directory events") {
+    // CPU requests
+    RdBlkS,             desc="...";
+    RdBlkM,             desc="...";
+    RdBlk,              desc="...";
+    CtoD,               desc="...";
+    WriteThrough,       desc="WriteThrough Message";
+    Atomic,             desc="Atomic Message";
+
+    // writebacks
+    VicDirty,           desc="...";
+    VicClean,           desc="...";
+    CPUData,            desc="WB data from CPU";
+    StaleWB,         desc="Notification that WB has been superceded by a probe";
+
+    // probe responses
+    CPUPrbResp,            desc="Probe Response Msg";
+
+    ProbeAcksComplete,  desc="Probe Acks Complete";
+
+    L3Hit,              desc="Hit in L3 return data to core";
+
+    // Replacement
+    PF_Repl,            desc="Replace address from probe filter";
+
+    // Memory Controller
+    MemData, desc="Fetched data from memory arrives";
+    WBAck, desc="Writeback Ack from memory arrives";
+
+    CoreUnblock,            desc="Core received data, unblock";
+    UnblockWriteThrough,    desc="Unblock because of writethrough request finishing";
+
+    StaleVicDirty,        desc="Core invalidated before VicDirty processed";
+  }
+
+  enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
+    L3DataArrayRead,    desc="Read the data array";
+    L3DataArrayWrite,   desc="Write the data array";
+    L3TagArrayRead,     desc="Read the data array";
+    L3TagArrayWrite,    desc="Write the data array";
+
+    PFTagArrayRead,     desc="Read the data array";
+    PFTagArrayWrite,    desc="Write the data array";
+  }
+
+  // TYPES
+
+  enumeration(ProbeFilterState, desc="") {
+    T,  desc="Tracked";
+    NT, desc="Not tracked";
+    B, desc="Blocked, This entry is being replaced";
+  }
+
+  // DirectoryEntry
+  structure(Entry, desc="...", interface="AbstractEntry") {
+    State DirectoryState,          desc="Directory state";
+    DataBlock DataBlk,             desc="data for the block";
+    NetDest VicDirtyIgnore,  desc="VicDirty coming from whom to ignore";
+  }
+
+  structure(CacheEntry, desc="...", interface="AbstractCacheEntry") {
+    DataBlock DataBlk,          desc="data for the block";
+    MachineID LastSender,       desc="Mach which this block came from";
+    ProbeFilterState pfState,   desc="ProbeFilter state",default="Directory_ProbeFilterState_NT";
+    bool isOnCPU,               desc="Block valid in the CPU complex",default="false";
+    bool isOnGPU,               desc="Block valid in the GPU complex",default="false";
+  }
+
+  structure(TBE, desc="...") {
+    State TBEState,     desc="Transient state";
+    DataBlock DataBlk,  desc="data for the block";
+    bool Dirty,         desc="Is the data dirty?";
+    int NumPendingAcks,        desc="num acks expected";
+    MachineID OriginalRequestor,        desc="Original Requestor";
+    MachineID WTRequestor,        desc="WT Requestor";
+    bool Cached,        desc="data hit in Cache";
+    bool MemData,       desc="Got MemData?",default="false";
+    bool wtData,       desc="Got write through data?",default="false";
+    bool atomicData,   desc="Got Atomic op?",default="false";
+    Cycles InitialRequestTime, desc="...";
+    Cycles ForwardRequestTime, desc="...";
+    Cycles ProbeRequestStartTime, desc="...";
+    MachineID LastSender, desc="Mach which this block came from";
+    bool L3Hit, default="false", desc="Was this an L3 hit?";
+    uint64_t probe_id,        desc="probe id for lifetime profiling";
+    WriteMask writeMask,    desc="outstanding write through mask";
+    Addr demandAddress,  desc="Address of demand request which caused probe filter eviction";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+  }
+
+  TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
+
+  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+  Tick clockEdge();
+  Tick cyclesToTicks(Cycles c);
+
+  void set_tbe(TBE a);
+  void unset_tbe();
+  void wakeUpAllBuffers();
+  void wakeUpBuffers(Addr a);
+  Cycles curCycle();
+
+  Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" {
+    Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr));
+
+    if (is_valid(dir_entry)) {
+      //DPRINTF(RubySlicc, "Getting entry %s: %s\n", addr, dir_entry.DataBlk);
+      return dir_entry;
+    }
+
+    dir_entry :=  static_cast(Entry, "pointer",
+                              directory.allocate(addr, new Entry));
+    return dir_entry;
+  }
+
+  DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
+    TBE tbe := TBEs.lookup(addr);
+    if (is_valid(tbe) && tbe.MemData) {
+      DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe);
+      return tbe.DataBlk;
+    }
+    DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr));
+    return getDirectoryEntry(addr).DataBlk;
+  }
+
+  State getState(TBE tbe, CacheEntry entry, Addr addr) {
+    CacheEntry probeFilterEntry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(addr));
+    if (inclusiveDir) {
+      if (is_valid(probeFilterEntry) && probeFilterEntry.pfState == ProbeFilterState:B) {
+        return State:B_P;
+      }
+    }
+    return getDirectoryEntry(addr).DirectoryState;
+  }
+
+  void setState(TBE tbe, CacheEntry entry, Addr addr, State state) {
+    getDirectoryEntry(addr).DirectoryState := state;
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      functionalMemoryRead(pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs.lookup(addr);
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+            testAndWrite(addr, tbe.DataBlk, pkt);
+    }
+
+    num_functional_writes := num_functional_writes +
+        functionalMemoryWrite(pkt);
+    return num_functional_writes;
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    // For this Directory, all permissions are just tracked in Directory, since
+    // it's not possible to have something in TBE but not Dir, just keep track
+    // of state all in one place.
+    if (directory.isPresent(addr)) {
+      return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState);
+    }
+
+    return AccessPermission:NotPresent;
+  }
+
+  void setAccessPermission(CacheEntry entry, Addr addr, State state) {
+    getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state));
+  }
+
+  void recordRequestType(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L3DataArrayRead) {
+      L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr);
+    } else if (request_type == RequestType:L3DataArrayWrite) {
+      L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
+    } else if (request_type == RequestType:L3TagArrayRead) {
+      L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:L3TagArrayWrite) {
+      L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    } else if (request_type == RequestType:PFTagArrayRead) {
+      ProbeFilterMemory.recordRequestType(CacheRequestType:TagArrayRead, addr);
+    } else if (request_type == RequestType:PFTagArrayWrite) {
+      ProbeFilterMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    }
+  }
+
+  bool checkResourceAvailable(RequestType request_type, Addr addr) {
+    if (request_type == RequestType:L3DataArrayRead) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L3DataArrayWrite) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr);
+    } else if (request_type == RequestType:L3TagArrayRead) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:L3TagArrayWrite) {
+      return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:PFTagArrayRead) {
+      return ProbeFilterMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:PFTagArrayWrite) {
+      return ProbeFilterMemory.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else {
+      error("Invalid RequestType type in checkResourceAvailable");
+      return true;
+    }
+  }
+
+  bool isNotPresentProbeFilter(Addr address) {
+    if (ProbeFilterMemory.isTagPresent(address) ||
+        ProbeFilterMemory.cacheAvail(address)) {
+        return false;
+    }
+    return true;
+  }
+
+  bool isGPUSharer(Addr address) {
+    assert(ProbeFilterMemory.isTagPresent(address));
+    CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+    if (entry.pfState == ProbeFilterState:NT) {
+       return true;
+    } else if (entry.isOnGPU){
+       return true;
+    }
+    return false;
+  }
+
+  bool isCPUSharer(Addr address) {
+    assert(ProbeFilterMemory.isTagPresent(address));
+    CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+    if (entry.pfState == ProbeFilterState:NT) {
+       return true;
+    } else if (entry.isOnCPU){
+       return true;
+    }
+    return false;
+  }
+
+
+  // ** OUT_PORTS **
+  out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore);
+  out_port(responseNetwork_out, ResponseMsg, responseToCore);
+
+  out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+  out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue);
+
+  // ** IN_PORTS **
+
+  // Trigger Queue
+  in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=5) {
+    if (triggerQueue_in.isReady(clockEdge())) {
+      peek(triggerQueue_in, TriggerMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == TriggerType:AcksComplete) {
+          trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe);
+        }else if (in_msg.Type == TriggerType:UnblockWriteThrough) {
+          trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe);
+        } else {
+          error("Unknown trigger msg");
+        }
+      }
+    }
+  }
+
+  in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=4) {
+    if (L3TriggerQueue_in.isReady(clockEdge())) {
+      peek(L3TriggerQueue_in, TriggerMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == TriggerType:L3Hit) {
+          trigger(Event:L3Hit, in_msg.addr, entry, tbe);
+        } else {
+          error("Unknown trigger msg");
+        }
+      }
+    }
+  }
+
+  // Unblock Network
+  in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=3) {
+    if (unblockNetwork_in.isReady(clockEdge())) {
+      peek(unblockNetwork_in, UnblockMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        trigger(Event:CoreUnblock, in_msg.addr, entry, tbe);
+      }
+    }
+  }
+
+  // Core response network
+  in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=2) {
+    if (responseNetwork_in.isReady(clockEdge())) {
+      peek(responseNetwork_in, ResponseMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
+          trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:CPUData) {
+          trigger(Event:CPUData, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
+            trigger(Event:StaleWB, in_msg.addr, entry, tbe);
+        } else {
+          error("Unexpected response type");
+        }
+      }
+    }
+  }
+
+  // off-chip memory request/response is done
+  in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=1) {
+    if (memQueue_in.isReady(clockEdge())) {
+      peek(memQueue_in, MemoryMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
+          trigger(Event:MemData, in_msg.addr, entry, tbe);
+          DPRINTF(RubySlicc, "%s\n", in_msg);
+        } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
+          trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them.
+        } else {
+          DPRINTF(RubySlicc, "%s\n", in_msg.Type);
+          error("Invalid message");
+        }
+      }
+    }
+  }
+
+  in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) {
+    if (requestNetwork_in.isReady(clockEdge())) {
+      peek(requestNetwork_in, CPURequestMsg) {
+        TBE tbe := TBEs.lookup(in_msg.addr);
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr));
+        if (inclusiveDir && isNotPresentProbeFilter(in_msg.addr)) {
+            Addr victim := ProbeFilterMemory.cacheProbe(in_msg.addr);
+            tbe := TBEs.lookup(victim);
+            entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(victim));
+            trigger(Event:PF_Repl, victim, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
+          trigger(Event:RdBlk, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
+          trigger(Event:RdBlkS, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+          trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+          trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+          trigger(Event:Atomic, in_msg.addr, entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
+          if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+            DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr);
+            trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+          } else {
+            DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr);
+            trigger(Event:VicDirty, in_msg.addr, entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+          if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
+            DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr);
+            trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe);
+          } else {
+            DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr);
+            trigger(Event:VicClean, in_msg.addr, entry, tbe);
+          }
+        } else {
+          error("Bad request message type");
+        }
+      }
+    }
+  }
+
+  // Actions
+  action(s_sendResponseS, "s", desc="send Shared response") {
+    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:NBSysResp;
+      if (tbe.L3Hit) {
+        out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+      } else {
+        out_msg.Sender := machineID;
+      }
+      out_msg.Destination.add(tbe.OriginalRequestor);
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := false;
+      out_msg.State := CoherenceState:Shared;
+      out_msg.InitialRequestTime := tbe.InitialRequestTime;
+      out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+      out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+      out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.L3Hit := tbe.L3Hit;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  action(es_sendResponseES, "es", desc="send Exclusive or Shared response") {
+    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := CoherenceResponseType:NBSysResp;
+      if (tbe.L3Hit) {
+        out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+      } else {
+        out_msg.Sender := machineID;
+      }
+      out_msg.Destination.add(tbe.OriginalRequestor);
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+      out_msg.Dirty := tbe.Dirty;
+      if (tbe.Cached) {
+        out_msg.State := CoherenceState:Shared;
+      } else {
+        out_msg.State := CoherenceState:Exclusive;
+      }
+      out_msg.InitialRequestTime := tbe.InitialRequestTime;
+      out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+      out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+      out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.L3Hit := tbe.L3Hit;
+      DPRINTF(RubySlicc, "%s\n", out_msg);
+    }
+  }
+
+  // write-through and atomics do not send an unblock ack back to the
+  // directory. Hence, directory has to generate a self unblocking
+  // message. Additionally, write through's does not require data
+  // in its response. Hence, write through is treated seperately from
+  // write-back and atomics
+  action(m_sendResponseM, "m", desc="send Modified response") {
+    if (tbe.wtData) {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:UnblockWriteThrough;
+      }
+    }else{
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysResp;
+        if (tbe.L3Hit) {
+          out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0));
+        } else {
+          out_msg.Sender := machineID;
+        }
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.Dirty := tbe.Dirty;
+        out_msg.State := CoherenceState:Modified;
+        out_msg.CtoD := false;
+        out_msg.InitialRequestTime := tbe.InitialRequestTime;
+        out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
+        out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+        out_msg.OriginalResponder := tbe.LastSender;
+        if(tbe.atomicData){
+          out_msg.WTRequestor := tbe.WTRequestor;
+        }
+        out_msg.L3Hit := tbe.L3Hit;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+      if (tbe.atomicData) {
+        enqueue(triggerQueue_out, TriggerMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Type := TriggerType:UnblockWriteThrough;
+        }
+      }
+    }
+  }
+
+  action(c_sendResponseCtoD, "c", desc="send CtoD Ack") {
+      enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysResp;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        out_msg.Dirty := false;
+        out_msg.State := CoherenceState:Modified;
+        out_msg.CtoD := true;
+        out_msg.InitialRequestTime := tbe.InitialRequestTime;
+        out_msg.ForwardRequestTime := curCycle();
+        out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+      }
+  }
+
+  action(w_sendResponseWBAck, "w", desc="send WB Ack") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysWBAck;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := curCycle();
+        out_msg.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(l_queueMemWBReq, "lq", desc="Write WB data to memory") {
+    peek(responseNetwork_in, ResponseMsg) {
+      queueMemoryWrite(machineID, address, to_memory_controller_latency,
+                       in_msg.DataBlk);
+    }
+  }
+
+  action(l_queueMemRdReq, "lr", desc="Read data from memory") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) {
+          out_msg.addr := address;
+          out_msg.Type := TriggerType:L3Hit;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        tbe.DataBlk := entry.DataBlk;
+        tbe.LastSender := entry.LastSender;
+        tbe.L3Hit := true;
+        tbe.MemData := true;
+        L3CacheMemory.deallocate(address);
+      } else {
+        queueMemoryRead(machineID, address, to_memory_controller_latency);
+      }
+    }
+  }
+
+  action(dc_probeInvCoreData, "dc", desc="probe inv cores, return data") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbInv;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        if(isCPUSharer(address)) {
+          out_msg.Destination.broadcast(MachineType:CorePair);  // won't be realistic for multisocket
+        }
+
+        // add relevant TCC node to list. This replaces all TCPs and SQCs
+        if(isGPUSharer(address)) {
+          if ((in_msg.Type == CoherenceRequestType:WriteThrough ||
+               in_msg.Type == CoherenceRequestType:Atomic) &&
+               in_msg.NoWriteConflict) {
+          // Don't Include TCCs unless there was write-CAB conflict in the TCC
+          } else if(noTCCdir) {
+            out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                    TCC_select_low_bit, TCC_select_num_bits));
+          } else {
+	        out_msg.Destination.add(map_Address_to_TCCdir(address));
+          }
+        }
+        out_msg.Destination.remove(in_msg.Requestor);
+        tbe.NumPendingAcks := out_msg.Destination.count();
+        if (tbe.NumPendingAcks == 0) {
+          enqueue(triggerQueue_out, TriggerMsg, 1) {
+            out_msg.addr := address;
+            out_msg.Type := TriggerType:AcksComplete;
+          }
+        }
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+        APPEND_TRANSITION_COMMENT(" dc: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(bp_backProbe, "bp", desc="back probe") {
+    enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+      out_msg.addr := address;
+      out_msg.Type := ProbeRequestType:PrbInv;
+      out_msg.ReturnData := true;
+      out_msg.MessageSize := MessageSizeType:Control;
+      if(isCPUSharer(address)) {
+        // won't be realistic for multisocket
+        out_msg.Destination.broadcast(MachineType:CorePair);
+      }
+      // add relevant TCC node to the list. This replaces all TCPs and SQCs
+      if(isGPUSharer(address)) {
+        if (noTCCdir) {
+          //Don't need to notify TCC about reads
+        } else {
+          out_msg.Destination.add(map_Address_to_TCCdir(address));
+          tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+        }
+        if (noTCCdir && CAB_TCC) {
+          out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                  TCC_select_low_bit, TCC_select_num_bits));
+        }
+      }
+      tbe.NumPendingAcks := out_msg.Destination.count();
+      if (tbe.NumPendingAcks == 0) {
+        enqueue(triggerQueue_out, TriggerMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Type := TriggerType:AcksComplete;
+        }
+      }
+      DPRINTF(RubySlicc, "%s\n", (out_msg));
+      APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
+      APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+      APPEND_TRANSITION_COMMENT(" - back probe");
+      tbe.ProbeRequestStartTime := curCycle();
+    }
+  }
+
+  action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
+    peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbDowngrade;
+        out_msg.ReturnData := true;
+        out_msg.MessageSize := MessageSizeType:Control;
+        if(isCPUSharer(address)) {
+          out_msg.Destination.broadcast(MachineType:CorePair);  // won't be realistic for multisocket
+        }
+        // add relevant TCC node to the list. This replaces all TCPs and SQCs
+        if(isGPUSharer(address)) {
+          if (noTCCdir) {
+            //Don't need to notify TCC about reads
+          } else {
+	    out_msg.Destination.add(map_Address_to_TCCdir(address));
+            tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
+          }
+          if (noTCCdir && CAB_TCC) {
+            out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                    TCC_select_low_bit, TCC_select_num_bits));
+          }
+        }
+        out_msg.Destination.remove(in_msg.Requestor);
+        tbe.NumPendingAcks := out_msg.Destination.count();
+        if (tbe.NumPendingAcks == 0) {
+          enqueue(triggerQueue_out, TriggerMsg, 1) {
+            out_msg.addr := address;
+            out_msg.Type := TriggerType:AcksComplete;
+          }
+        }
+        DPRINTF(RubySlicc, "%s\n", (out_msg));
+        APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") {
+    peek(requestNetwork_in, CPURequestMsg) { // not the right network?
+      enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := ProbeRequestType:PrbInv;
+        out_msg.ReturnData := false;
+        out_msg.MessageSize := MessageSizeType:Control;
+        if(isCPUSharer(address)) {
+          out_msg.Destination.broadcast(MachineType:CorePair);  // won't be realistic for multisocket
+        }
+
+        // add relevant TCC node to the list. This replaces all TCPs and SQCs
+        if(isGPUSharer(address)) {
+          if (noTCCdir) {
+              out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                                TCC_select_low_bit, TCC_select_num_bits));
+          } else {
+	          out_msg.Destination.add(map_Address_to_TCCdir(address));
+          }
+        }
+        out_msg.Destination.remove(in_msg.Requestor);
+        tbe.NumPendingAcks := out_msg.Destination.count();
+        if (tbe.NumPendingAcks == 0) {
+          enqueue(triggerQueue_out, TriggerMsg, 1) {
+            out_msg.addr := address;
+            out_msg.Type := TriggerType:AcksComplete;
+          }
+        }
+        APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
+        APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+        DPRINTF(RubySlicc, "%s\n", out_msg);
+        tbe.ProbeRequestStartTime := curCycle();
+      }
+    }
+  }
+
+  action(sm_setMRU, "sm", desc="set probe filter entry as MRU") {
+    ProbeFilterMemory.setMRU(address);
+  }
+
+  action(d_writeDataToMemory, "d", desc="Write data to memory") {
+    peek(responseNetwork_in, ResponseMsg) {
+      getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
+      DPRINTF(RubySlicc, "Writing Data: %s to address %s\n", in_msg.DataBlk,
+              in_msg.addr);
+    }
+  }
+
+  action(te_allocateTBEForEviction, "te", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+      tbe.writeMask.clear();
+      tbe.wtData := false;
+      tbe.atomicData := false;
+      tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+      tbe.Dirty := false;
+      tbe.NumPendingAcks := 0;
+  }
+
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    peek(requestNetwork_in, CPURequestMsg) {
+      TBEs.allocate(address);
+      set_tbe(TBEs.lookup(address));
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        tbe.writeMask.clear();
+        tbe.writeMask.orMask(in_msg.writeMask);
+        tbe.wtData := true;
+        tbe.WTRequestor := in_msg.WTRequestor;
+        tbe.LastSender := in_msg.Requestor;
+      }
+      if (in_msg.Type == CoherenceRequestType:Atomic) {
+        tbe.writeMask.clear();
+        tbe.writeMask.orMask(in_msg.writeMask);
+        tbe.atomicData := true;
+        tbe.WTRequestor := in_msg.WTRequestor;
+        tbe.LastSender := in_msg.Requestor;
+      }
+      tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
+      tbe.Dirty := false;
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask);
+        tbe.Dirty := false;
+      }
+      tbe.OriginalRequestor := in_msg.Requestor;
+      tbe.NumPendingAcks := 0;
+      tbe.Cached := in_msg.ForceShared;
+      tbe.InitialRequestTime := in_msg.InitialRequestTime;
+    }
+  }
+
+  action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
+    if (tbe.Dirty == false) {
+        getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    }
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(wd_writeBackData, "wd", desc="Write back data if needed") {
+    if (tbe.wtData) {
+      DataBlock tmp := getDirectoryEntry(address).DataBlk;
+      tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+      tbe.DataBlk := tmp;
+      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    } else if (tbe.atomicData) {
+      tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,
+                                tbe.writeMask);
+      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    } else if (tbe.Dirty == false) {
+      getDirectoryEntry(address).DataBlk := tbe.DataBlk;
+    }
+  }
+
+  action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") {
+    peek(memQueue_in, MemoryMsg) {
+      if (tbe.wtData == true) {
+        // DO Nothing (already have the directory data)
+      } else if (tbe.Dirty == false) {
+        tbe.DataBlk := getDirectoryEntry(address).DataBlk;
+      }
+      tbe.MemData := true;
+    }
+  }
+
+  action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (in_msg.Dirty) {
+        DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender);
+        DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk);
+        if (tbe.wtData) {
+          DataBlock tmp := in_msg.DataBlk;
+          tmp.copyPartial(tbe.DataBlk,tbe.writeMask);
+          tbe.DataBlk := tmp;
+        } else if (tbe.Dirty) {
+          if(tbe.atomicData == false && tbe.wtData == false) {
+            DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
+            assert(tbe.DataBlk == in_msg.DataBlk);  // in case of double data
+          }
+        } else {
+          tbe.DataBlk := in_msg.DataBlk;
+          tbe.Dirty := in_msg.Dirty;
+          tbe.LastSender := in_msg.Sender;
+        }
+      }
+      if (in_msg.Hit) {
+        tbe.Cached := true;
+      }
+    }
+  }
+
+  action(mwc_markSinkWriteCancel, "mwc", desc="Mark to sink impending VicDirty") {
+    peek(responseNetwork_in, ResponseMsg) {
+      DPRINTF(RubySlicc, "Write cancel bit set on address %s\n", address);
+      getDirectoryEntry(address).VicDirtyIgnore.add(in_msg.Sender);
+      APPEND_TRANSITION_COMMENT(" setting bit to sink VicDirty ");
+    }
+  }
+
+  action(x_decrementAcks, "x", desc="decrement Acks pending") {
+    tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
+    APPEND_TRANSITION_COMMENT(" Acks remaining: ");
+    APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+  }
+
+  action(o_checkForCompletion, "o", desc="check for ack completion") {
+    if (tbe.NumPendingAcks == 0) {
+      enqueue(triggerQueue_out, TriggerMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := TriggerType:AcksComplete;
+      }
+    }
+    APPEND_TRANSITION_COMMENT(" Check: Acks remaining: ");
+    APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
+  }
+
+  action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor);
+    }
+  }
+
+  action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") {
+    peek(responseNetwork_in, ResponseMsg) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+        entry.DataBlk := in_msg.DataBlk;
+        entry.LastSender := in_msg.Sender;
+      } else {
+        if (L3CacheMemory.cacheAvail(address) == false) {
+          Addr victim := L3CacheMemory.cacheProbe(address);
+          CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+                                                 L3CacheMemory.lookup(victim));
+          queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+                           victim_entry.DataBlk);
+          L3CacheMemory.deallocate(victim);
+        }
+        assert(L3CacheMemory.cacheAvail(address));
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+        entry.DataBlk := in_msg.DataBlk;
+
+        entry.LastSender := in_msg.Sender;
+      }
+    }
+  }
+
+  action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") {
+    if ((tbe.wtData || tbe.atomicData) && useL3OnWT) {
+      if (L3CacheMemory.isTagPresent(address)) {
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) ");
+        entry.DataBlk := tbe.DataBlk;
+        entry.LastSender := tbe.LastSender;
+      } else {
+        if (L3CacheMemory.cacheAvail(address) == false) {
+          Addr victim := L3CacheMemory.cacheProbe(address);
+          CacheEntry victim_entry := static_cast(CacheEntry, "pointer",
+                                                 L3CacheMemory.lookup(victim));
+          queueMemoryWrite(machineID, victim, to_memory_controller_latency,
+                           victim_entry.DataBlk);
+          L3CacheMemory.deallocate(victim);
+        }
+        assert(L3CacheMemory.cacheAvail(address));
+        CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry));
+        APPEND_TRANSITION_COMMENT(" al wrote data to L3 ");
+        entry.DataBlk := tbe.DataBlk;
+        entry.LastSender := tbe.LastSender;
+      }
+    }
+  }
+
+  action(apf_allocateProbeFilterEntry, "apf", desc="Allocate probe filte entry") {
+    if (!ProbeFilterMemory.isTagPresent(address)) {
+        if (inclusiveDir) {
+            assert(ProbeFilterMemory.cacheAvail(address));
+        } else if (ProbeFilterMemory.cacheAvail(address) == false) {
+          Addr victim := ProbeFilterMemory.cacheProbe(address);
+          ProbeFilterMemory.deallocate(victim);
+        }
+        assert(ProbeFilterMemory.cacheAvail(address));
+        CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.allocate(address, new CacheEntry));
+        APPEND_TRANSITION_COMMENT(" allocating a new probe filter entry");
+        entry.pfState := ProbeFilterState:NT;
+        if (inclusiveDir) {
+          entry.pfState := ProbeFilterState:T;
+        }
+        entry.isOnCPU := false;
+        entry.isOnGPU := false;
+    }
+  }
+
+  action(mpfe_markPFEntryForEviction, "mpfe", desc="Mark this PF entry is being evicted") {
+    assert(ProbeFilterMemory.isTagPresent(address));
+    CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+    entry.pfState := ProbeFilterState:B;
+    peek(requestNetwork_in, CPURequestMsg) {
+      tbe.demandAddress := in_msg.addr;
+    }
+  }
+
+  action(we_wakeUpEvictionDependents, "we", desc="Wake up requests waiting for demand address and victim address") {
+    wakeUpBuffers(address);
+    wakeUpBuffers(tbe.demandAddress);
+  }
+
+  action(dpf_deallocateProbeFilter, "dpf", desc="deallocate PF entry") {
+    assert(ProbeFilterMemory.isTagPresent(address));
+    ProbeFilterMemory.deallocate(address);
+  }
+
+  action(upf_updateProbeFilter, "upf", desc="") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      assert(ProbeFilterMemory.isTagPresent(address));
+      CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+      if (in_msg.Type == CoherenceRequestType:WriteThrough) {
+        entry.pfState := ProbeFilterState:T;
+        entry.isOnCPU := false;
+        entry.isOnGPU := false;
+      } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+        entry.pfState := ProbeFilterState:T;
+        entry.isOnCPU := false;
+        entry.isOnGPU := false;
+      } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
+        entry.pfState := ProbeFilterState:T;
+        entry.isOnCPU := false;
+        entry.isOnGPU := false;
+      } else if (in_msg.Type == CoherenceRequestType:CtoD) {
+        entry.pfState := ProbeFilterState:T;
+        entry.isOnCPU := false;
+        entry.isOnGPU := false;
+      }
+      if(machineIDToMachineType(in_msg.Requestor) == MachineType:CorePair) {
+        entry.isOnCPU := true;
+      } else {
+        entry.isOnGPU := true;
+      }
+    }
+  }
+
+  action(rmcd_removeSharerConditional, "rmcd", desc="remove sharer from probe Filter, conditional") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      if (ProbeFilterMemory.isTagPresent(address)) {
+        CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address));
+        if(machineIDToMachineType(in_msg.Requestor) == MachineType:CorePair) {//CorePair has inclusive L2
+          if (in_msg.Type == CoherenceRequestType:VicDirty) {
+            entry.isOnCPU := false;
+          } else if (in_msg.Type == CoherenceRequestType:VicClean) {
+            entry.isOnCPU := false;
+          }
+        }
+      }
+    }
+  }
+
+  action(sf_setForwardReqTime, "sf", desc="...") {
+    tbe.ForwardRequestTime := curCycle();
+  }
+
+  action(dl_deallocateL3, "dl", desc="deallocate the L3 block") {
+    L3CacheMemory.deallocate(address);
+  }
+
+  action(p_popRequestQueue, "p", desc="pop request queue") {
+    requestNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pr_popResponseQueue, "pr", desc="pop response queue") {
+    responseNetwork_in.dequeue(clockEdge());
+  }
+
+  action(pm_popMemQueue, "pm", desc="pop mem queue") {
+    memQueue_in.dequeue(clockEdge());
+  }
+
+  action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
+    triggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") {
+    L3TriggerQueue_in.dequeue(clockEdge());
+  }
+
+  action(pu_popUnblockQueue, "pu", desc="pop unblock queue") {
+    unblockNetwork_in.dequeue(clockEdge());
+  }
+
+  action(zz_recycleRequestQueue, "zz", desc="recycle request queue") {
+    requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(yy_recycleResponseQueue, "yy", desc="recycle response queue") {
+    responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
+  action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") {
+    stall_and_wait(requestNetwork_in, address);
+  }
+
+  action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") {
+    wakeUpBuffers(address);
+  }
+
+  action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") {
+    wakeUpAllBuffers();
+  }
+
+  action(z_stall, "z", desc="...") {
+  }
+
+  // TRANSITIONS
+  transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
+    st_stallAndWaitRequest;
+  }
+
+  // It may be possible to save multiple invalidations here!
+  transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, {Atomic, WriteThrough}) {
+    st_stallAndWaitRequest;
+  }
+
+
+  // transitions from U
+  transition(U, PF_Repl, B_P) {PFTagArrayRead, PFTagArrayWrite}{
+    te_allocateTBEForEviction;
+    apf_allocateProbeFilterEntry;
+    bp_backProbe;
+    sm_setMRU;
+    mpfe_markPFEntryForEviction;
+  }
+
+  transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} {
+    t_allocateTBE;
+    apf_allocateProbeFilterEntry;
+    l_queueMemRdReq;
+    sc_probeShrCoreData;
+    sm_setMRU;
+    upf_updateProbeFilter;
+    p_popRequestQueue;
+  }
+
+  transition(U, WriteThrough, BM_PM) {L3TagArrayRead, L3TagArrayWrite, PFTagArrayRead, PFTagArrayWrite} {
+    t_allocateTBE;
+    apf_allocateProbeFilterEntry;
+    w_sendResponseWBAck;
+    l_queueMemRdReq;
+    dc_probeInvCoreData;
+    sm_setMRU;
+    upf_updateProbeFilter;
+    p_popRequestQueue;
+  }
+
+  transition(U, Atomic, BM_PM) {L3TagArrayRead, L3TagArrayWrite, PFTagArrayRead, PFTagArrayWrite} {
+    t_allocateTBE;
+    apf_allocateProbeFilterEntry;
+    l_queueMemRdReq;
+    dc_probeInvCoreData;
+    sm_setMRU;
+    upf_updateProbeFilter;
+    p_popRequestQueue;
+  }
+
+  transition(U, {RdBlkM}, BM_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} {
+    t_allocateTBE;
+    apf_allocateProbeFilterEntry;
+    l_queueMemRdReq;
+    dc_probeInvCoreData;
+    sm_setMRU;
+    upf_updateProbeFilter;
+    p_popRequestQueue;
+  }
+
+  transition(U, RdBlk, B_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite}{
+    t_allocateTBE;
+    apf_allocateProbeFilterEntry;
+    l_queueMemRdReq;
+    sc_probeShrCoreData;
+    sm_setMRU;
+    upf_updateProbeFilter;
+    p_popRequestQueue;
+  }
+
+  transition(U, CtoD, BP) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} {
+    t_allocateTBE;
+    apf_allocateProbeFilterEntry;
+    ic_probeInvCore;
+    sm_setMRU;
+    upf_updateProbeFilter;
+    p_popRequestQueue;
+  }
+
+  transition(U, VicDirty, BL) {L3TagArrayRead} {
+    t_allocateTBE;
+    w_sendResponseWBAck;
+    rmcd_removeSharerConditional;
+    p_popRequestQueue;
+  }
+
+  transition(U, VicClean, BL) {L3TagArrayRead} {
+    t_allocateTBE;
+    w_sendResponseWBAck;
+    rmcd_removeSharerConditional;
+    p_popRequestQueue;
+  }
+
+  transition(BL, {VicDirty, VicClean}) {
+    zz_recycleRequestQueue;
+  }
+
+  transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} {
+    d_writeDataToMemory;
+    al_allocateL3Block;
+    wa_wakeUpDependents;
+    dt_deallocateTBE;
+    //l_queueMemWBReq;  // why need an ack?  esp. with DRAMSim, just put it in queue no ack needed
+    pr_popResponseQueue;
+  }
+
+  transition(BL, StaleWB, U) {L3TagArrayWrite} {
+    dt_deallocateTBE;
+    wa_wakeUpAllDependents;
+    pr_popResponseQueue;
+  }
+
+  transition({B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P}, {VicDirty, VicClean}) {
+    z_stall;
+  }
+
+  transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, WBAck) {
+    pm_popMemQueue;
+  }
+
+  transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, PF_Repl) {
+    zz_recycleRequestQueue;
+  }
+
+  transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, StaleVicDirty) {
+    rv_removeVicDirtyIgnore;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+  }
+
+  transition({B}, CoreUnblock, U) {
+    wa_wakeUpDependents;
+    pu_popUnblockQueue;
+  }
+
+  transition(B, UnblockWriteThrough, U) {
+    wa_wakeUpDependents;
+    pt_popTriggerQueue;
+  }
+
+  transition(BS_PM, MemData, BS_Pm) {} {
+    mt_writeMemDataToTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BM_PM, MemData, BM_Pm){} {
+    mt_writeMemDataToTBE;
+    pm_popMemQueue;
+  }
+
+  transition(B_PM, MemData, B_Pm){} {
+    mt_writeMemDataToTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BS_PM, L3Hit, BS_Pm) {} {
+    ptl_popTriggerQueue;
+  }
+
+  transition(BM_PM, L3Hit, BM_Pm) {} {
+    ptl_popTriggerQueue;
+  }
+
+  transition(B_PM, L3Hit, B_Pm) {} {
+    ptl_popTriggerQueue;
+  }
+
+  transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+    mt_writeMemDataToTBE;
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+    mt_writeMemDataToTBE;
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+  transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
+    mt_writeMemDataToTBE;
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pm_popMemQueue;
+  }
+
+  transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    ptl_popTriggerQueue;
+  }
+
+  transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    ptl_popTriggerQueue;
+  }
+
+  transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    ptl_popTriggerQueue;
+  }
+
+  transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, BP}, CPUPrbResp) {
+    y_writeProbeDataToTBE;
+    x_decrementAcks;
+    o_checkForCompletion;
+    pr_popResponseQueue;
+  }
+
+  transition(BS_PM, ProbeAcksComplete, BS_M) {} {
+    sf_setForwardReqTime;
+    pt_popTriggerQueue;
+  }
+
+  transition(BM_PM, ProbeAcksComplete, BM_M) {} {
+    sf_setForwardReqTime;
+    pt_popTriggerQueue;
+  }
+
+  transition(B_PM, ProbeAcksComplete, B_M){} {
+    sf_setForwardReqTime;
+    pt_popTriggerQueue;
+  }
+
+  transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+    sf_setForwardReqTime;
+    s_sendResponseS;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+    sf_setForwardReqTime;
+    m_sendResponseM;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
+    sf_setForwardReqTime;
+    es_sendResponseES;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(B_P, ProbeAcksComplete, U) {
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    we_wakeUpEvictionDependents;
+    dpf_deallocateProbeFilter;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+
+  transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
+    sf_setForwardReqTime;
+    c_sendResponseCtoD;
+    wd_writeBackData;
+    alwt_allocateL3BlockOnWT;
+    dt_deallocateTBE;
+    pt_popTriggerQueue;
+  }
+}
diff --git a/src/mem/protocol/MOESI_AMD_Base.slicc b/src/mem/protocol/MOESI_AMD_Base.slicc
new file mode 100644
index 000000000..b38145246
--- /dev/null
+++ b/src/mem/protocol/MOESI_AMD_Base.slicc
@@ -0,0 +1,6 @@
+protocol "MOESI_AMD_Base";
+include "RubySlicc_interfaces.slicc";
+include "MOESI_AMD_Base-msg.sm";
+include "MOESI_AMD_Base-CorePair.sm";
+include "MOESI_AMD_Base-L3cache.sm";
+include "MOESI_AMD_Base-dir.sm";
diff --git a/src/mem/protocol/RubySlicc_ComponentMapping.sm b/src/mem/protocol/RubySlicc_ComponentMapping.sm
index a72492b42..e1d7c4399 100644
--- a/src/mem/protocol/RubySlicc_ComponentMapping.sm
+++ b/src/mem/protocol/RubySlicc_ComponentMapping.sm
@@ -37,7 +37,10 @@ MachineID mapAddressToRange(Addr addr, MachineType type,
 NetDest broadcast(MachineType type);
 MachineID map_Address_to_DMA(Addr addr);
 MachineID map_Address_to_Directory(Addr addr);
+MachineID map_Address_to_RegionDir(Addr addr);
 NodeID map_Address_to_DirectoryNode(Addr addr);
+MachineID map_Address_to_TCCdir(Addr addr);
+NodeID map_Address_to_TCCdirNode(Addr addr);
 NodeID machineIDToNodeID(MachineID machID);
 NodeID machineIDToVersion(MachineID machID);
 MachineType machineIDToMachineType(MachineID machID);
diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm
index 5ee26d65c..c743ebe28 100644
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -62,7 +62,7 @@ bool testAndWrite(Addr addr, DataBlock datablk, Packet *pkt);
 
 // AccessPermission
 // The following five states define the access permission of all memory blocks.
-// These permissions have multiple uses.  They coordinate locking and 
+// These permissions have multiple uses.  They coordinate locking and
 // synchronization primitives, as well as enable functional accesses.
 // One should not need to add any additional permission values and it is very
 // risky to do so.
@@ -73,7 +73,7 @@ enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent")
   Read_Write, desc="block is Read/Write";
 
   // Possibly Invalid data
-  // The maybe stale permission indicates that accordingly to the protocol, 
+  // The maybe stale permission indicates that accordingly to the protocol,
   // there is no guarantee the block contains valid data.  However, functional
   // writes should update the block because a dataless PUT request may
   // revalidate the block's data.
@@ -227,6 +227,13 @@ enumeration(MachineType, desc="...", default="MachineType_NULL") {
     Collector,   desc="Collector Mach";
     L1Cache_wCC, desc="L1 Cache Mach to track cache-to-cache transfer (used for miss latency profile)";
     L2Cache_wCC, desc="L2 Cache Mach to track cache-to-cache transfer (used for miss latency profile)";
+    CorePair,    desc="Cache Mach (2 cores, Private L1Ds, Shared L1I & L2)";
+    TCP,         desc="GPU L1 Data Cache (Texture Cache per Pipe)";
+    TCC,         desc="GPU L2 Shared Cache (Texture Cache per Channel)";
+    TCCdir,      desc="Directory at the GPU L2 Cache (TCC)";
+    SQC,         desc="GPU L1 Instr Cache (Sequencer Cache)";
+    RegionDir,   desc="Region-granular directory";
+    RegionBuffer,desc="Region buffer for CPU and GPU";
     NULL,        desc="null mach type";
 }
 
diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm
index a6c57e1b0..b8d284725 100644
--- a/src/mem/protocol/RubySlicc_Types.sm
+++ b/src/mem/protocol/RubySlicc_Types.sm
@@ -31,8 +31,8 @@
 
 //
 // **PLEASE NOTE!**  When adding objects to this file you must also add a line
-// in the src/mem/ruby/SConscript file.  Otherwise the external object's .hh 
-// file will not be copied to the protocol directory and you will encounter a 
+// in the src/mem/ruby/SConscript file.  Otherwise the external object's .hh
+// file will not be copied to the protocol directory and you will encounter a
 // undefined declaration error.
 //
 
@@ -95,6 +95,8 @@ structure (NetDest, external = "yes", non_obj="yes") {
   bool intersectionIsEmpty(Set);
   bool intersectionIsEmpty(NetDest);
   MachineID smallestElement(MachineType);
+  NetDest OR(NetDest);
+  NetDest AND(NetDest);
 }
 
 structure (Sequencer, external = "yes") {
@@ -117,6 +119,44 @@ structure (Sequencer, external = "yes") {
   void invalidateSC(Addr);
 }
 
+structure (GPUCoalescer, external = "yes") {
+  void readCallback(Addr, DataBlock);
+  void readCallback(Addr, MachineType, DataBlock);
+  void readCallback(Addr, MachineType, DataBlock,
+                    Cycles, Cycles, Cycles);
+  void readCallback(Addr, MachineType, DataBlock,
+                    Cycles, Cycles, Cycles, bool);
+  void writeCallback(Addr, DataBlock);
+  void writeCallback(Addr, MachineType, DataBlock);
+  void writeCallback(Addr, MachineType, DataBlock,
+                     Cycles, Cycles, Cycles);
+  void writeCallback(Addr, MachineType, DataBlock,
+                     Cycles, Cycles, Cycles, bool);
+  void checkCoherence(Addr);
+  void evictionCallback(Addr);
+  void recordCPReadCallBack(MachineID, MachineID);
+  void recordCPWriteCallBack(MachineID, MachineID);
+}
+
+structure (VIPERCoalescer, external = "yes") {
+  void readCallback(Addr, DataBlock);
+  void readCallback(Addr, MachineType, DataBlock);
+  void readCallback(Addr, MachineType, DataBlock,
+                    Cycles, Cycles, Cycles);
+  void readCallback(Addr, MachineType, DataBlock,
+                    Cycles, Cycles, Cycles, bool);
+  void writeCallback(Addr, DataBlock);
+  void writeCallback(Addr, MachineType, DataBlock);
+  void writeCallback(Addr, MachineType, DataBlock,
+                     Cycles, Cycles, Cycles);
+  void writeCallback(Addr, MachineType, DataBlock,
+                     Cycles, Cycles, Cycles, bool);
+  void invCallback(Addr);
+  void wbCallback(Addr);
+  void checkCoherence(Addr);
+  void evictionCallback(Addr);
+}
+
 structure(RubyRequest, desc="...", interface="Message", external="yes") {
   Addr LineAddress,       desc="Line address for this request";
   Addr PhysicalAddress,   desc="Physical address for this request";
@@ -161,6 +201,7 @@ structure (CacheMemory, external = "yes") {
   Cycles getTagLatency();
   Cycles getDataLatency();
   void setMRU(Addr);
+  void setMRU(Addr, int);
   void setMRU(AbstractCacheEntry);
   void recordRequestType(CacheRequestType, Addr);
   bool checkResourceAvailable(CacheResourceType, Addr);
diff --git a/src/mem/protocol/SConsopts b/src/mem/protocol/SConsopts
index ca432a73e..47b36e276 100644
--- a/src/mem/protocol/SConsopts
+++ b/src/mem/protocol/SConsopts
@@ -33,6 +33,11 @@ import os
 Import('*')
 
 all_protocols.extend([
+    'GPU_VIPER',
+    'GPU_VIPER_Baseline',
+    'GPU_VIPER_Region',
+    'GPU_RfO',
+    'MOESI_AMD_Base',
     'MESI_Two_Level',
     'MESI_Three_Level',
     'MI_example',
diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript
index 16e932432..82a16c9b0 100644
--- a/src/mem/ruby/SConscript
+++ b/src/mem/ruby/SConscript
@@ -124,13 +124,20 @@ MakeInclude('common/Set.hh')
 MakeInclude('common/WriteMask.hh')
 MakeInclude('filters/AbstractBloomFilter.hh')
 MakeInclude('network/MessageBuffer.hh')
-MakeInclude('structures/Prefetcher.hh')
 MakeInclude('structures/CacheMemory.hh')
-MakeInclude('system/DMASequencer.hh')
 MakeInclude('structures/DirectoryMemory.hh')
-MakeInclude('structures/WireBuffer.hh')
 MakeInclude('structures/PerfectCacheMemory.hh')
 MakeInclude('structures/PersistentTable.hh')
-MakeInclude('system/Sequencer.hh')
+MakeInclude('structures/Prefetcher.hh')
 MakeInclude('structures/TBETable.hh')
 MakeInclude('structures/TimerTable.hh')
+MakeInclude('structures/WireBuffer.hh')
+MakeInclude('system/DMASequencer.hh')
+MakeInclude('system/Sequencer.hh')
+
+# External types : Group "mem/protocol" : include "header.hh" to the bottom
+# of this MakeIncludes if it is referenced as
+# <# include "mem/protocol/header.hh"> in any file
+# generated_dir = Dir('../protocol')
+MakeInclude('system/GPUCoalescer.hh')
+MakeInclude('system/VIPERCoalescer.hh')
diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc
index b3b37e5a6..7d3f20982 100644
--- a/src/mem/ruby/profiler/Profiler.cc
+++ b/src/mem/ruby/profiler/Profiler.cc
@@ -269,7 +269,7 @@ Profiler::collateStats()
                 it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
 
             AbstractController *ctr = (*it).second;
-            Sequencer *seq = ctr->getSequencer();
+            Sequencer *seq = ctr->getCPUSequencer();
             if (seq != NULL) {
                 m_outstandReqHist.add(seq->getOutstandReqHist());
             }
@@ -282,7 +282,7 @@ Profiler::collateStats()
                 it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
 
             AbstractController *ctr = (*it).second;
-            Sequencer *seq = ctr->getSequencer();
+            Sequencer *seq = ctr->getCPUSequencer();
             if (seq != NULL) {
                 // add all the latencies
                 m_latencyHist.add(seq->getLatencyHist());
diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
index 926556781..cbd068c04 100644
--- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
+++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
@@ -56,6 +56,12 @@ class AbstractCacheEntry : public AbstractEntry
     virtual DataBlock& getDataBlk()
     { panic("getDataBlk() not implemented!"); }
 
+    int validBlocks;
+    virtual int& getNumValidBlocks()
+    {
+        return validBlocks;
+    }
+
     // Functions for locking and unlocking the cache entry.  These are required
     // for supporting atomic memory accesses.
     void setLocked(int context);
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index 93fe50c88..458fde5bc 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -200,6 +200,12 @@ AbstractController::unblock(Addr addr)
     }
 }
 
+bool
+AbstractController::isBlocked(Addr addr)
+{
+    return (m_block_map.count(addr) > 0);
+}
+
 BaseMasterPort &
 AbstractController::getMasterPort(const std::string &if_name,
                                   PortID idx)
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 383507eed..4488ee3f4 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -73,6 +73,7 @@ class AbstractController : public MemObject, public Consumer
     // return instance name
     void blockOnQueue(Addr, MessageBuffer*);
     void unblock(Addr);
+    bool isBlocked(Addr);
 
     virtual MessageBuffer* getMandatoryQueue() const = 0;
     virtual MessageBuffer* getMemoryQueue() const = 0;
@@ -84,7 +85,7 @@ class AbstractController : public MemObject, public Consumer
     virtual void regStats();
 
     virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0;
-    virtual Sequencer* getSequencer() const = 0;
+    virtual Sequencer* getCPUSequencer() const = 0;
 
     //! These functions are used by ruby system to read/write the data blocks
     //! that exist with in the controller.
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
index 46071335e..cdedc2e14 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
@@ -43,6 +43,12 @@ map_Address_to_DirectoryNode(Addr addr)
     return DirectoryMemory::mapAddressToDirectoryVersion(addr);
 }
 
+inline NodeID
+map_Address_to_TCCdirNode(Addr addr)
+{
+    return DirectoryMemory::mapAddressToDirectoryVersion(addr);
+}
+
 // used to determine the home directory
 // returns a value between 0 and total_directories_within_the_system
 inline MachineID
@@ -53,6 +59,22 @@ map_Address_to_Directory(Addr addr)
     return mach;
 }
 
+inline MachineID
+map_Address_to_RegionDir(Addr addr)
+{
+    MachineID mach = {MachineType_RegionDir,
+                      map_Address_to_DirectoryNode(addr)};
+    return mach;
+}
+
+inline MachineID
+map_Address_to_TCCdir(Addr addr)
+{
+    MachineID mach =
+        {MachineType_TCCdir, map_Address_to_TCCdirNode(addr)};
+    return mach;
+}
+
 inline NetDest
 broadcast(MachineType type)
 {
@@ -102,4 +124,11 @@ createMachineID(MachineType type, NodeID id)
     return mach;
 }
 
+inline MachineID
+MachineTypeAndNodeIDToMachineID(MachineType type, NodeID node)
+{
+    MachineID mach = {type, node};
+    return mach;
+}
+
 #endif  // __MEM_RUBY_SLICC_INTERFACE_COMPONENTMAPPINGS_HH__
diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc
index a8a3ba949..45fb85d05 100644
--- a/src/mem/ruby/structures/CacheMemory.cc
+++ b/src/mem/ruby/structures/CacheMemory.cc
@@ -35,6 +35,7 @@
 #include "mem/protocol/AccessPermission.hh"
 #include "mem/ruby/structures/CacheMemory.hh"
 #include "mem/ruby/system/RubySystem.hh"
+#include "mem/ruby/system/WeightedLRUPolicy.hh"
 
 using namespace std;
 
@@ -66,29 +67,27 @@ CacheMemory::CacheMemory(const Params *p)
     m_start_index_bit = p->start_index_bit;
     m_is_instruction_only_cache = p->is_icache;
     m_resource_stalls = p->resourceStalls;
+    m_block_size = p->block_size;  // may be 0 at this point. Updated in init()
 }
 
 void
 CacheMemory::init()
 {
-    m_cache_num_sets = (m_cache_size / m_cache_assoc) /
-        RubySystem::getBlockSizeBytes();
+    if (m_block_size == 0) {
+        m_block_size = RubySystem::getBlockSizeBytes();
+    }
+    m_cache_num_sets = (m_cache_size / m_cache_assoc) / m_block_size;
     assert(m_cache_num_sets > 1);
     m_cache_num_set_bits = floorLog2(m_cache_num_sets);
     assert(m_cache_num_set_bits > 0);
 
-    m_cache.resize(m_cache_num_sets);
-    for (int i = 0; i < m_cache_num_sets; i++) {
-        m_cache[i].resize(m_cache_assoc);
-        for (int j = 0; j < m_cache_assoc; j++) {
-            m_cache[i][j] = NULL;
-        }
-    }
+    m_cache.resize(m_cache_num_sets,
+                    std::vector<AbstractCacheEntry*>(m_cache_assoc, nullptr));
 }
 
 CacheMemory::~CacheMemory()
 {
-    if (m_replacementPolicy_ptr != NULL)
+    if (m_replacementPolicy_ptr)
         delete m_replacementPolicy_ptr;
     for (int i = 0; i < m_cache_num_sets; i++) {
         for (int j = 0; j < m_cache_assoc; j++) {
@@ -359,6 +358,37 @@ CacheMemory::setMRU(const AbstractCacheEntry *e)
 }
 
 void
+CacheMemory::setMRU(Addr address, int occupancy)
+{
+    int64_t cacheSet = addressToCacheSet(address);
+    int loc = findTagInSet(cacheSet, address);
+
+    if(loc != -1) {
+        if (m_replacementPolicy_ptr->useOccupancy()) {
+            (static_cast<WeightedLRUPolicy*>(m_replacementPolicy_ptr))->
+                touch(cacheSet, loc, curTick(), occupancy);
+        } else {
+            m_replacementPolicy_ptr->
+                touch(cacheSet, loc, curTick());
+        }
+    }
+}
+
+int
+CacheMemory::getReplacementWeight(int64_t set, int64_t loc)
+{
+    assert(set < m_cache_num_sets);
+    assert(loc < m_cache_assoc);
+    int ret = 0;
+    if(m_cache[set][loc] != NULL) {
+        ret = m_cache[set][loc]->getNumValidBlocks();
+        assert(ret >= 0);
+    }
+
+    return ret;
+}
+
+void
 CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const
 {
     uint64_t warmedUpBlocks = 0;
diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh
index 72805b32b..5b30505d3 100644
--- a/src/mem/ruby/structures/CacheMemory.hh
+++ b/src/mem/ruby/structures/CacheMemory.hh
@@ -106,7 +106,8 @@ class CacheMemory : public SimObject
 
     // Set this address to most recently used
     void setMRU(Addr address);
-    // Set this entry to most recently used
+    void setMRU(Addr addr, int occupancy);
+    int getReplacementWeight(int64_t set, int64_t loc);
     void setMRU(const AbstractCacheEntry *e);
 
     // Functions for locking and unlocking cache lines corresponding to the
@@ -146,6 +147,7 @@ class CacheMemory : public SimObject
     Stats::Scalar numDataArrayStalls;
 
     int getCacheSize() const { return m_cache_size; }
+    int getCacheAssoc() const { return m_cache_assoc; }
     int getNumBlocks() const { return m_cache_num_sets * m_cache_assoc; }
     Addr getAddressAtIdx(int idx) const;
 
@@ -182,6 +184,7 @@ class CacheMemory : public SimObject
     int m_cache_assoc;
     int m_start_index_bit;
     bool m_resource_stalls;
+    int m_block_size;
 };
 
 std::ostream& operator<<(std::ostream& out, const CacheMemory& obj);
diff --git a/src/mem/ruby/structures/RubyCache.py b/src/mem/ruby/structures/RubyCache.py
index 4eb87ac74..9fc4726b0 100644
--- a/src/mem/ruby/structures/RubyCache.py
+++ b/src/mem/ruby/structures/RubyCache.py
@@ -42,6 +42,7 @@ class RubyCache(SimObject):
                          "")
     start_index_bit = Param.Int(6, "index start, default 6 for 64-byte line");
     is_icache = Param.Bool(False, "is instruction only cache");
+    block_size = Param.MemorySize("0B", "block size in bytes. 0 means default RubyBlockSize")
 
     dataArrayBanks = Param.Int(1, "Number of banks for the data array")
     tagArrayBanks = Param.Int(1, "Number of banks for the tag array")
diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc
new file mode 100644
index 000000000..db279bd3a
--- /dev/null
+++ b/src/mem/ruby/system/GPUCoalescer.cc
@@ -0,0 +1,1397 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#include "base/misc.hh"
+#include "base/str.hh"
+#include "config/the_isa.hh"
+
+#if THE_ISA == X86_ISA
+#include "arch/x86/insts/microldstop.hh"
+
+#endif // X86_ISA
+#include "mem/ruby/system/GPUCoalescer.hh"
+
+#include "cpu/testers/rubytest/RubyTester.hh"
+#include "debug/GPUCoalescer.hh"
+#include "debug/MemoryAccess.hh"
+#include "debug/ProtocolTrace.hh"
+#include "debug/RubyPort.hh"
+#include "debug/RubyStats.hh"
+#include "gpu-compute/shader.hh"
+#include "mem/packet.hh"
+#include "mem/ruby/common/DataBlock.hh"
+#include "mem/ruby/common/SubBlock.hh"
+#include "mem/ruby/network/MessageBuffer.hh"
+#include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/slicc_interface/AbstractController.hh"
+#include "mem/ruby/slicc_interface/RubyRequest.hh"
+#include "mem/ruby/structures/CacheMemory.hh"
+#include "mem/ruby/system/RubySystem.hh"
+#include "params/RubyGPUCoalescer.hh"
+
+using namespace std;
+
+GPUCoalescer *
+RubyGPUCoalescerParams::create()
+{
+    return new GPUCoalescer(this);
+}
+
+HSAScope
+reqScopeToHSAScope(Request* req)
+{
+    HSAScope accessScope = HSAScope_UNSPECIFIED;
+    if (req->isScoped()) {
+        if (req->isWavefrontScope()) {
+            accessScope = HSAScope_WAVEFRONT;
+        } else if (req->isWorkgroupScope()) {
+            accessScope = HSAScope_WORKGROUP;
+        } else if (req->isDeviceScope()) {
+            accessScope = HSAScope_DEVICE;
+        } else if (req->isSystemScope()) {
+            accessScope = HSAScope_SYSTEM;
+        } else {
+            fatal("Bad scope type");
+        }
+    }
+    return accessScope;
+}
+
+HSASegment
+reqSegmentToHSASegment(Request* req)
+{
+    HSASegment accessSegment = HSASegment_GLOBAL;
+
+    if (req->isGlobalSegment()) {
+        accessSegment = HSASegment_GLOBAL;
+    } else if (req->isGroupSegment()) {
+        accessSegment = HSASegment_GROUP;
+    } else if (req->isPrivateSegment()) {
+        accessSegment = HSASegment_PRIVATE;
+    } else if (req->isKernargSegment()) {
+        accessSegment = HSASegment_KERNARG;
+    } else if (req->isReadonlySegment()) {
+        accessSegment = HSASegment_READONLY;
+    } else if (req->isSpillSegment()) {
+        accessSegment = HSASegment_SPILL;
+    } else if (req->isArgSegment()) {
+        accessSegment = HSASegment_ARG;
+    } else {
+        fatal("Bad segment type");
+    }
+
+    return accessSegment;
+}
+
+GPUCoalescer::GPUCoalescer(const Params *p)
+    : RubyPort(p), issueEvent(this), deadlockCheckEvent(this)
+{
+    m_store_waiting_on_load_cycles = 0;
+    m_store_waiting_on_store_cycles = 0;
+    m_load_waiting_on_store_cycles = 0;
+    m_load_waiting_on_load_cycles = 0;
+
+    m_outstanding_count = 0;
+
+    m_max_outstanding_requests = 0;
+    m_deadlock_threshold = 0;
+    m_instCache_ptr = nullptr;
+    m_dataCache_ptr = nullptr;
+
+    m_instCache_ptr = p->icache;
+    m_dataCache_ptr = p->dcache;
+    m_max_outstanding_requests = p->max_outstanding_requests;
+    m_deadlock_threshold = p->deadlock_threshold;
+
+    assert(m_max_outstanding_requests > 0);
+    assert(m_deadlock_threshold > 0);
+    assert(m_instCache_ptr);
+    assert(m_dataCache_ptr);
+
+    m_data_cache_hit_latency = p->dcache_hit_latency;
+
+    m_usingNetworkTester = p->using_network_tester;
+    assumingRfOCoherence = p->assume_rfo;
+}
+
+GPUCoalescer::~GPUCoalescer()
+{
+}
+
+void
+GPUCoalescer::wakeup()
+{
+    // Check for deadlock of any of the requests
+    Cycles current_time = curCycle();
+
+    // Check across all outstanding requests
+    int total_outstanding = 0;
+
+    RequestTable::iterator read = m_readRequestTable.begin();
+    RequestTable::iterator read_end = m_readRequestTable.end();
+    for (; read != read_end; ++read) {
+        GPUCoalescerRequest* request = read->second;
+        if (current_time - request->issue_time < m_deadlock_threshold)
+            continue;
+
+        panic("Possible Deadlock detected. Aborting!\n"
+             "version: %d request.paddr: 0x%x m_readRequestTable: %d "
+             "current time: %u issue_time: %d difference: %d\n", m_version,
+              request->pkt->getAddr(), m_readRequestTable.size(),
+              current_time * clockPeriod(), request->issue_time * clockPeriod(),
+              (current_time - request->issue_time)*clockPeriod());
+    }
+
+    RequestTable::iterator write = m_writeRequestTable.begin();
+    RequestTable::iterator write_end = m_writeRequestTable.end();
+    for (; write != write_end; ++write) {
+        GPUCoalescerRequest* request = write->second;
+        if (current_time - request->issue_time < m_deadlock_threshold)
+            continue;
+
+        panic("Possible Deadlock detected. Aborting!\n"
+             "version: %d request.paddr: 0x%x m_writeRequestTable: %d "
+             "current time: %u issue_time: %d difference: %d\n", m_version,
+              request->pkt->getAddr(), m_writeRequestTable.size(),
+              current_time * clockPeriod(), request->issue_time * clockPeriod(),
+              (current_time - request->issue_time) * clockPeriod());
+    }
+
+    total_outstanding += m_writeRequestTable.size();
+    total_outstanding += m_readRequestTable.size();
+
+    assert(m_outstanding_count == total_outstanding);
+
+    if (m_outstanding_count > 0) {
+        // If there are still outstanding requests, keep checking
+        schedule(deadlockCheckEvent,
+                 m_deadlock_threshold * clockPeriod() +
+                 curTick());
+    }
+}
+
+void
+GPUCoalescer::resetStats()
+{
+    m_latencyHist.reset();
+    m_missLatencyHist.reset();
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        m_typeLatencyHist[i]->reset();
+        m_missTypeLatencyHist[i]->reset();
+        for (int j = 0; j < MachineType_NUM; j++) {
+            m_missTypeMachLatencyHist[i][j]->reset();
+        }
+    }
+
+    for (int i = 0; i < MachineType_NUM; i++) {
+        m_missMachLatencyHist[i]->reset();
+
+        m_IssueToInitialDelayHist[i]->reset();
+        m_InitialToForwardDelayHist[i]->reset();
+        m_ForwardToFirstResponseDelayHist[i]->reset();
+        m_FirstResponseToCompletionDelayHist[i]->reset();
+    }
+}
+
+void
+GPUCoalescer::printProgress(ostream& out) const
+{
+}
+
+RequestStatus
+GPUCoalescer::getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
+{
+    Addr line_addr = makeLineAddress(pkt->getAddr());
+
+    if (!m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge())) {
+        return RequestStatus_BufferFull;
+    }
+
+    if(m_controller->isBlocked(line_addr) &&
+       request_type != RubyRequestType_Locked_RMW_Write) {
+        return RequestStatus_Aliased;
+    }
+
+    if ((request_type == RubyRequestType_ST) ||
+        (request_type == RubyRequestType_ATOMIC) ||
+        (request_type == RubyRequestType_ATOMIC_RETURN) ||
+        (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
+        (request_type == RubyRequestType_RMW_Read) ||
+        (request_type == RubyRequestType_RMW_Write) ||
+        (request_type == RubyRequestType_Load_Linked) ||
+        (request_type == RubyRequestType_Store_Conditional) ||
+        (request_type == RubyRequestType_Locked_RMW_Read) ||
+        (request_type == RubyRequestType_Locked_RMW_Write) ||
+        (request_type == RubyRequestType_FLUSH)) {
+
+        // Check if there is any outstanding read request for the same
+        // cache line.
+        if (m_readRequestTable.count(line_addr) > 0) {
+            m_store_waiting_on_load_cycles++;
+            return RequestStatus_Aliased;
+        }
+
+        if (m_writeRequestTable.count(line_addr) > 0) {
+          // There is an outstanding write request for the cache line
+          m_store_waiting_on_store_cycles++;
+          return RequestStatus_Aliased;
+        }
+    } else {
+        // Check if there is any outstanding write request for the same
+        // cache line.
+        if (m_writeRequestTable.count(line_addr) > 0) {
+            m_load_waiting_on_store_cycles++;
+            return RequestStatus_Aliased;
+        }
+
+        if (m_readRequestTable.count(line_addr) > 0) {
+            // There is an outstanding read request for the cache line
+            m_load_waiting_on_load_cycles++;
+            return RequestStatus_Aliased;
+        }
+    }
+
+    return RequestStatus_Ready;
+
+}
+
+
+
+// sets the kernelEndList
+void
+GPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt)
+{
+    // Don't know if this will happen or is possible
+    // but I just want to be careful and not have it become
+    // simulator hang in the future
+    DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id);
+    assert(kernelEndList.count(wavefront_id) == 0);
+
+    kernelEndList[wavefront_id] = pkt;
+    DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n",
+            kernelEndList.size());
+}
+
+
+// Insert the request on the correct request table.  Return true if
+// the entry was already present.
+bool
+GPUCoalescer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
+{
+    assert(getRequestStatus(pkt, request_type) == RequestStatus_Ready ||
+           pkt->req->isLockedRMW() ||
+           !m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge()));
+
+    int total_outstanding M5_VAR_USED =
+        m_writeRequestTable.size() + m_readRequestTable.size();
+
+    assert(m_outstanding_count == total_outstanding);
+
+    // See if we should schedule a deadlock check
+    if (deadlockCheckEvent.scheduled() == false) {
+        schedule(deadlockCheckEvent, m_deadlock_threshold + curTick());
+    }
+
+    Addr line_addr = makeLineAddress(pkt->getAddr());
+    if ((request_type == RubyRequestType_ST) ||
+        (request_type == RubyRequestType_ATOMIC) ||
+        (request_type == RubyRequestType_ATOMIC_RETURN) ||
+        (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
+        (request_type == RubyRequestType_RMW_Read) ||
+        (request_type == RubyRequestType_RMW_Write) ||
+        (request_type == RubyRequestType_Load_Linked) ||
+        (request_type == RubyRequestType_Store_Conditional) ||
+        (request_type == RubyRequestType_Locked_RMW_Read) ||
+        (request_type == RubyRequestType_Locked_RMW_Write) ||
+        (request_type == RubyRequestType_FLUSH)) {
+
+        pair<RequestTable::iterator, bool> r =
+          m_writeRequestTable.insert(RequestTable::value_type(line_addr,
+                                       (GPUCoalescerRequest*) NULL));
+        if (r.second) {
+            RequestTable::iterator i = r.first;
+            i->second = new GPUCoalescerRequest(pkt, request_type,
+                                                curCycle());
+            DPRINTF(GPUCoalescer,
+                    "Inserting write request for paddr %#x for type %d\n",
+                    pkt->req->getPaddr(), i->second->m_type);
+            m_outstanding_count++;
+        } else {
+            return true;
+        }
+    } else {
+        pair<RequestTable::iterator, bool> r =
+            m_readRequestTable.insert(RequestTable::value_type(line_addr,
+                                        (GPUCoalescerRequest*) NULL));
+
+        if (r.second) {
+            RequestTable::iterator i = r.first;
+            i->second = new GPUCoalescerRequest(pkt, request_type,
+                                             curCycle());
+            DPRINTF(GPUCoalescer,
+                    "Inserting read request for paddr %#x for type %d\n",
+                    pkt->req->getPaddr(), i->second->m_type);
+            m_outstanding_count++;
+        } else {
+            return true;
+        }
+    }
+
+    m_outstandReqHist.sample(m_outstanding_count);
+
+    total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size();
+    assert(m_outstanding_count == total_outstanding);
+
+    return false;
+}
+
+void
+GPUCoalescer::markRemoved()
+{
+    m_outstanding_count--;
+    assert(m_outstanding_count ==
+           m_writeRequestTable.size() + m_readRequestTable.size());
+}
+
+void
+GPUCoalescer::removeRequest(GPUCoalescerRequest* srequest)
+{
+    assert(m_outstanding_count ==
+           m_writeRequestTable.size() + m_readRequestTable.size());
+
+    Addr line_addr = makeLineAddress(srequest->pkt->getAddr());
+    if ((srequest->m_type == RubyRequestType_ST) ||
+        (srequest->m_type == RubyRequestType_RMW_Read) ||
+        (srequest->m_type == RubyRequestType_RMW_Write) ||
+        (srequest->m_type == RubyRequestType_Load_Linked) ||
+        (srequest->m_type == RubyRequestType_Store_Conditional) ||
+        (srequest->m_type == RubyRequestType_Locked_RMW_Read) ||
+        (srequest->m_type == RubyRequestType_Locked_RMW_Write)) {
+        m_writeRequestTable.erase(line_addr);
+    } else {
+        m_readRequestTable.erase(line_addr);
+    }
+
+    markRemoved();
+}
+
+bool
+GPUCoalescer::handleLlsc(Addr address, GPUCoalescerRequest* request)
+{
+    //
+    // The success flag indicates whether the LLSC operation was successful.
+    // LL ops will always succeed, but SC may fail if the cache line is no
+    // longer locked.
+    //
+    bool success = true;
+    if (request->m_type == RubyRequestType_Store_Conditional) {
+        if (!m_dataCache_ptr->isLocked(address, m_version)) {
+            //
+            // For failed SC requests, indicate the failure to the cpu by
+            // setting the extra data to zero.
+            //
+            request->pkt->req->setExtraData(0);
+            success = false;
+        } else {
+            //
+            // For successful SC requests, indicate the success to the cpu by
+            // setting the extra data to one.
+            //
+            request->pkt->req->setExtraData(1);
+        }
+        //
+        // Independent of success, all SC operations must clear the lock
+        //
+        m_dataCache_ptr->clearLocked(address);
+    } else if (request->m_type == RubyRequestType_Load_Linked) {
+        //
+        // Note: To fully follow Alpha LLSC semantics, should the LL clear any
+        // previously locked cache lines?
+        //
+        m_dataCache_ptr->setLocked(address, m_version);
+    } else if ((m_dataCache_ptr->isTagPresent(address)) &&
+               (m_dataCache_ptr->isLocked(address, m_version))) {
+        //
+        // Normal writes should clear the locked address
+        //
+        m_dataCache_ptr->clearLocked(address);
+    }
+    return success;
+}
+
+void
+GPUCoalescer::writeCallback(Addr address, DataBlock& data)
+{
+    writeCallback(address, MachineType_NULL, data);
+}
+
+void
+GPUCoalescer::writeCallback(Addr address,
+                         MachineType mach,
+                         DataBlock& data)
+{
+    writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
+}
+
+void
+GPUCoalescer::writeCallback(Addr address,
+                         MachineType mach,
+                         DataBlock& data,
+                         Cycles initialRequestTime,
+                         Cycles forwardRequestTime,
+                         Cycles firstResponseTime)
+{
+    writeCallback(address, mach, data,
+                  initialRequestTime, forwardRequestTime, firstResponseTime,
+                  false);
+}
+
+void
+GPUCoalescer::writeCallback(Addr address,
+                         MachineType mach,
+                         DataBlock& data,
+                         Cycles initialRequestTime,
+                         Cycles forwardRequestTime,
+                         Cycles firstResponseTime,
+                         bool isRegion)
+{
+    assert(address == makeLineAddress(address));
+
+    DPRINTF(GPUCoalescer, "write callback for address %#x\n", address);
+    assert(m_writeRequestTable.count(makeLineAddress(address)));
+
+    RequestTable::iterator i = m_writeRequestTable.find(address);
+    assert(i != m_writeRequestTable.end());
+    GPUCoalescerRequest* request = i->second;
+
+    m_writeRequestTable.erase(i);
+    markRemoved();
+
+    assert((request->m_type == RubyRequestType_ST) ||
+           (request->m_type == RubyRequestType_ATOMIC) ||
+           (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
+           (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
+           (request->m_type == RubyRequestType_RMW_Read) ||
+           (request->m_type == RubyRequestType_RMW_Write) ||
+           (request->m_type == RubyRequestType_Load_Linked) ||
+           (request->m_type == RubyRequestType_Store_Conditional) ||
+           (request->m_type == RubyRequestType_Locked_RMW_Read) ||
+           (request->m_type == RubyRequestType_Locked_RMW_Write) ||
+           (request->m_type == RubyRequestType_FLUSH));
+
+
+    //
+    // For Alpha, properly handle LL, SC, and write requests with respect to
+    // locked cache blocks.
+    //
+    // Not valid for Network_test protocl
+    //
+    bool success = true;
+    if(!m_usingNetworkTester)
+        success = handleLlsc(address, request);
+
+    if (request->m_type == RubyRequestType_Locked_RMW_Read) {
+        m_controller->blockOnQueue(address, m_mandatory_q_ptr);
+    } else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
+        m_controller->unblock(address);
+    }
+
+    hitCallback(request, mach, data, success,
+                request->issue_time, forwardRequestTime, firstResponseTime,
+                isRegion);
+}
+
+void
+GPUCoalescer::readCallback(Addr address, DataBlock& data)
+{
+    readCallback(address, MachineType_NULL, data);
+}
+
+void
+GPUCoalescer::readCallback(Addr address,
+                        MachineType mach,
+                        DataBlock& data)
+{
+    readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
+}
+
+void
+GPUCoalescer::readCallback(Addr address,
+                        MachineType mach,
+                        DataBlock& data,
+                        Cycles initialRequestTime,
+                        Cycles forwardRequestTime,
+                        Cycles firstResponseTime)
+{
+
+    readCallback(address, mach, data,
+                 initialRequestTime, forwardRequestTime, firstResponseTime,
+                 false);
+}
+
+void
+GPUCoalescer::readCallback(Addr address,
+                        MachineType mach,
+                        DataBlock& data,
+                        Cycles initialRequestTime,
+                        Cycles forwardRequestTime,
+                        Cycles firstResponseTime,
+                        bool isRegion)
+{
+    assert(address == makeLineAddress(address));
+    assert(m_readRequestTable.count(makeLineAddress(address)));
+
+    DPRINTF(GPUCoalescer, "read callback for address %#x\n", address);
+    RequestTable::iterator i = m_readRequestTable.find(address);
+    assert(i != m_readRequestTable.end());
+    GPUCoalescerRequest* request = i->second;
+
+    m_readRequestTable.erase(i);
+    markRemoved();
+
+    assert((request->m_type == RubyRequestType_LD) ||
+           (request->m_type == RubyRequestType_IFETCH));
+
+    hitCallback(request, mach, data, true,
+                request->issue_time, forwardRequestTime, firstResponseTime,
+                isRegion);
+}
+
+void
+GPUCoalescer::hitCallback(GPUCoalescerRequest* srequest,
+                       MachineType mach,
+                       DataBlock& data,
+                       bool success,
+                       Cycles initialRequestTime,
+                       Cycles forwardRequestTime,
+                       Cycles firstResponseTime,
+                       bool isRegion)
+{
+    PacketPtr pkt = srequest->pkt;
+    Addr request_address = pkt->getAddr();
+    Addr request_line_address = makeLineAddress(request_address);
+
+    RubyRequestType type = srequest->m_type;
+
+    // Set this cache entry to the most recently used
+    if (type == RubyRequestType_IFETCH) {
+        if (m_instCache_ptr->isTagPresent(request_line_address))
+            m_instCache_ptr->setMRU(request_line_address);
+    } else {
+        if (m_dataCache_ptr->isTagPresent(request_line_address))
+            m_dataCache_ptr->setMRU(request_line_address);
+    }
+
+    recordMissLatency(srequest, mach,
+                      initialRequestTime,
+                      forwardRequestTime,
+                      firstResponseTime,
+                      success, isRegion);
+    // update the data
+    //
+    // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
+    int len = reqCoalescer[request_line_address].size();
+    std::vector<PacketPtr> mylist;
+    for (int i = 0; i < len; ++i) {
+        PacketPtr pkt = reqCoalescer[request_line_address][i].first;
+        assert(type ==
+               reqCoalescer[request_line_address][i].second[PrimaryType]);
+        request_address = pkt->getAddr();
+        request_line_address = makeLineAddress(pkt->getAddr());
+        if (pkt->getPtr<uint8_t>()) {
+            if ((type == RubyRequestType_LD) ||
+                (type == RubyRequestType_ATOMIC) ||
+                (type == RubyRequestType_ATOMIC_RETURN) ||
+                (type == RubyRequestType_IFETCH) ||
+                (type == RubyRequestType_RMW_Read) ||
+                (type == RubyRequestType_Locked_RMW_Read) ||
+                (type == RubyRequestType_Load_Linked)) {
+                memcpy(pkt->getPtr<uint8_t>(),
+                       data.getData(getOffset(request_address),
+                                    pkt->getSize()),
+                       pkt->getSize());
+            } else {
+                data.setData(pkt->getPtr<uint8_t>(),
+                             getOffset(request_address), pkt->getSize());
+            }
+        } else {
+            DPRINTF(MemoryAccess,
+                    "WARNING.  Data not transfered from Ruby to M5 for type " \
+                    "%s\n",
+                    RubyRequestType_to_string(type));
+        }
+
+        // If using the RubyTester, update the RubyTester sender state's
+        // subBlock with the recieved data.  The tester will later access
+        // this state.
+        // Note: RubyPort will access it's sender state before the
+        // RubyTester.
+        if (m_usingRubyTester) {
+            RubyPort::SenderState *requestSenderState =
+                safe_cast<RubyPort::SenderState*>(pkt->senderState);
+            RubyTester::SenderState* testerSenderState =
+                safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
+            testerSenderState->subBlock.mergeFrom(data);
+        }
+
+        mylist.push_back(pkt);
+    }
+    delete srequest;
+    reqCoalescer.erase(request_line_address);
+    assert(!reqCoalescer.count(request_line_address));
+
+
+
+    completeHitCallback(mylist, len);
+}
+
+bool
+GPUCoalescer::empty() const
+{
+    return m_writeRequestTable.empty() && m_readRequestTable.empty();
+}
+
+// Analyzes the packet to see if this request can be coalesced.
+// If request can be coalesced, this request is added to the reqCoalescer table
+// and makeRequest returns RequestStatus_Issued;
+// If this is the first request to a cacheline, request is added to both
+// newRequests queue and to the reqCoalescer table; makeRequest
+// returns RequestStatus_Issued.
+// If there is a pending request to this cacheline and this request
+// can't be coalesced, RequestStatus_Aliased is returned and
+// the packet needs to be reissued.
+RequestStatus
+GPUCoalescer::makeRequest(PacketPtr pkt)
+{
+    // Check for GPU Barrier Kernel End or Kernel Begin
+    // Leave these to be handled by the child class
+    // Kernel End/Barrier = isFlush + isRelease
+    // Kernel Begin = isFlush + isAcquire
+    if (pkt->req->isKernel()) {
+        if (pkt->req->isAcquire()){
+            // This is a Kernel Begin leave handling to
+            // virtual xCoalescer::makeRequest
+            return RequestStatus_Issued;
+        }else if(pkt->req->isRelease()) {
+            // This is a Kernel End leave handling to
+            // virtual xCoalescer::makeRequest
+            // If we are here then we didn't call
+            // a virtual version of this function
+            // so we will also schedule the callback
+            int wf_id = 0;
+            if (pkt->req->hasContextId()) {
+                wf_id = pkt->req->contextId();
+            }
+            insertKernel(wf_id, pkt);
+            newKernelEnds.push_back(wf_id);
+            if (!issueEvent.scheduled()) {
+                schedule(issueEvent, curTick());
+            }
+            return RequestStatus_Issued;
+        }
+    }
+
+    // If number of outstanding requests greater than the max allowed,
+    // return RequestStatus_BufferFull. This logic can be extended to
+    // support proper backpressure.
+    if (m_outstanding_count >= m_max_outstanding_requests) {
+        return RequestStatus_BufferFull;
+    }
+
+    RubyRequestType primary_type = RubyRequestType_NULL;
+    RubyRequestType secondary_type = RubyRequestType_NULL;
+
+    if (pkt->isLLSC()) {
+        //
+        // Alpha LL/SC instructions need to be handled carefully by the cache
+        // coherence protocol to ensure they follow the proper semantics. In
+        // particular, by identifying the operations as atomic, the protocol
+        // should understand that migratory sharing optimizations should not
+        // be performed (i.e. a load between the LL and SC should not steal
+        // away exclusive permission).
+        //
+        if (pkt->isWrite()) {
+            primary_type = RubyRequestType_Store_Conditional;
+        } else {
+            assert(pkt->isRead());
+            primary_type = RubyRequestType_Load_Linked;
+        }
+        secondary_type = RubyRequestType_ATOMIC;
+    } else if (pkt->req->isLockedRMW()) {
+        //
+        // x86 locked instructions are translated to store cache coherence
+        // requests because these requests should always be treated as read
+        // exclusive operations and should leverage any migratory sharing
+        // optimization built into the protocol.
+        //
+        if (pkt->isWrite()) {
+            primary_type = RubyRequestType_Locked_RMW_Write;
+        } else {
+            assert(pkt->isRead());
+            primary_type = RubyRequestType_Locked_RMW_Read;
+        }
+        secondary_type = RubyRequestType_ST;
+    } else if (pkt->isAtomicOp()) {
+        //
+        // GPU Atomic Operation
+        //
+        primary_type = RubyRequestType_ATOMIC;
+        secondary_type = RubyRequestType_ATOMIC;
+    } else {
+        if (pkt->isRead()) {
+            if (pkt->req->isInstFetch()) {
+                primary_type = secondary_type = RubyRequestType_IFETCH;
+            } else {
+#if THE_ISA == X86_ISA
+                uint32_t flags = pkt->req->getFlags();
+                bool storeCheck = flags &
+                        (TheISA::StoreCheck << TheISA::FlagShift);
+#else
+                bool storeCheck = false;
+#endif // X86_ISA
+                if (storeCheck) {
+                    primary_type = RubyRequestType_RMW_Read;
+                    secondary_type = RubyRequestType_ST;
+                } else {
+                    primary_type = secondary_type = RubyRequestType_LD;
+                }
+            }
+        } else if (pkt->isWrite()) {
+            //
+            // Note: M5 packets do not differentiate ST from RMW_Write
+            //
+            primary_type = secondary_type = RubyRequestType_ST;
+        } else if (pkt->isFlush()) {
+            primary_type = secondary_type = RubyRequestType_FLUSH;
+        } else if (pkt->req->isRelease() || pkt->req->isAcquire()) {
+            if (assumingRfOCoherence) {
+                // If we reached here, this request must be a memFence
+                // and the protocol implements RfO, the coalescer can
+                // assume sequentially consistency and schedule the callback
+                // immediately.
+                // Currently the code implements fence callbacks
+                // by reusing the mechanism for kernel completions.
+                // This should be fixed.
+                int wf_id = 0;
+                if (pkt->req->hasContextId()) {
+                    wf_id = pkt->req->contextId();
+                }
+                insertKernel(wf_id, pkt);
+                newKernelEnds.push_back(wf_id);
+                if (!issueEvent.scheduled()) {
+                    schedule(issueEvent, curTick());
+                }
+                return RequestStatus_Issued;
+            } else {
+                // If not RfO, return issued here and let the child coalescer
+                // take care of it.
+                return RequestStatus_Issued;
+            }
+        } else {
+            panic("Unsupported ruby packet type\n");
+        }
+    }
+
+    // Check if there is any pending request to this cache line from
+    // previous cycles.
+    // If there is a pending request, return aliased. Since coalescing
+    // across time is not permitted, aliased requests are not coalesced.
+    // If a request for this address has already been issued, we must block
+    RequestStatus status = getRequestStatus(pkt, primary_type);
+    if (status != RequestStatus_Ready)
+        return status;
+
+    Addr line_addr = makeLineAddress(pkt->getAddr());
+
+    // Check if this request can be coalesced with previous
+    // requests from this cycle.
+    if (!reqCoalescer.count(line_addr)) {
+        // This is the first access to this cache line.
+        // A new request to the memory subsystem has to be
+        // made in the next cycle for this cache line, so
+        // add this line addr to the "newRequests" queue
+        newRequests.push_back(line_addr);
+
+    // There was a request to this cache line in this cycle,
+    // let us see if we can coalesce this request with the previous
+    // requests from this cycle
+    } else if (primary_type !=
+               reqCoalescer[line_addr][0].second[PrimaryType]) {
+        // can't coalesce loads, stores and atomics!
+        return RequestStatus_Aliased;
+    } else if (pkt->req->isLockedRMW() ||
+               reqCoalescer[line_addr][0].first->req->isLockedRMW()) {
+        // can't coalesce locked accesses, but can coalesce atomics!
+        return RequestStatus_Aliased;
+    } else if (pkt->req->hasContextId() && pkt->req->isRelease() &&
+               pkt->req->contextId() !=
+               reqCoalescer[line_addr][0].first->req->contextId()) {
+        // can't coalesce releases from different wavefronts
+        return RequestStatus_Aliased;
+    }
+
+    // in addition to the packet, we need to save both request types
+    reqCoalescer[line_addr].push_back(
+            RequestDesc(pkt, std::vector<RubyRequestType>()) );
+    reqCoalescer[line_addr].back().second.push_back(primary_type);
+    reqCoalescer[line_addr].back().second.push_back(secondary_type);
+    if (!issueEvent.scheduled())
+        schedule(issueEvent, curTick());
+    // TODO: issue hardware prefetches here
+    return RequestStatus_Issued;
+}
+
+void
+GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
+{
+
+    int proc_id = -1;
+    if (pkt != NULL && pkt->req->hasContextId()) {
+        proc_id = pkt->req->contextId();
+    }
+
+    // If valid, copy the pc to the ruby request
+    Addr pc = 0;
+    if (pkt->req->hasPC()) {
+        pc = pkt->req->getPC();
+    }
+
+    // At the moment setting scopes only counts
+    // for GPU spill space accesses
+    // which is pkt->req->isStack()
+    // this scope is REPLACE since it
+    // does not need to be flushed at the end
+    // of a kernel Private and local may need
+    // to be visible at the end of the kernel
+    HSASegment accessSegment = reqSegmentToHSASegment(pkt->req);
+    HSAScope accessScope = reqScopeToHSAScope(pkt->req);
+
+    Addr line_addr = makeLineAddress(pkt->getAddr());
+
+    // Creating WriteMask that records written bytes
+    // and atomic operations. This enables partial writes
+    // and partial reads of those writes
+    DataBlock dataBlock;
+    dataBlock.clear();
+    uint32_t blockSize = RubySystem::getBlockSizeBytes();
+    std::vector<bool> accessMask(blockSize,false);
+    std::vector< std::pair<int,AtomicOpFunctor*> > atomicOps;
+    uint32_t tableSize = reqCoalescer[line_addr].size();
+    for (int i = 0; i < tableSize; i++) {
+        PacketPtr tmpPkt = reqCoalescer[line_addr][i].first;
+        uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
+        uint32_t tmpSize = tmpPkt->getSize();
+        if (tmpPkt->isAtomicOp()) {
+            std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
+                                                        tmpPkt->getAtomicOp());
+            atomicOps.push_back(tmpAtomicOp);
+        } else if(tmpPkt->isWrite()) {
+            dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
+                              tmpOffset, tmpSize);
+        }
+        for (int j = 0; j < tmpSize; j++) {
+            accessMask[tmpOffset + j] = true;
+        }
+    }
+    std::shared_ptr<RubyRequest> msg;
+    if (pkt->isAtomicOp()) {
+        msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
+                              pkt->getPtr<uint8_t>(),
+                              pkt->getSize(), pc, secondary_type,
+                              RubyAccessMode_Supervisor, pkt,
+                              PrefetchBit_No, proc_id, 100,
+                              blockSize, accessMask,
+                              dataBlock, atomicOps,
+                              accessScope, accessSegment);
+    } else {
+        msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
+                              pkt->getPtr<uint8_t>(),
+                              pkt->getSize(), pc, secondary_type,
+                              RubyAccessMode_Supervisor, pkt,
+                              PrefetchBit_No, proc_id, 100,
+                              blockSize, accessMask,
+                              dataBlock,
+                              accessScope, accessSegment);
+    }
+    DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
+             curTick(), m_version, "Coal", "Begin", "", "",
+             printAddress(msg->getPhysicalAddress()),
+             RubyRequestType_to_string(secondary_type));
+
+    fatal_if(secondary_type == RubyRequestType_IFETCH,
+             "there should not be any I-Fetch requests in the GPU Coalescer");
+
+    // Send the message to the cache controller
+    fatal_if(m_data_cache_hit_latency == 0,
+             "should not have a latency of zero");
+
+    assert(m_mandatory_q_ptr);
+    m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+}
+
+template <class KEY, class VALUE>
+std::ostream &
+operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
+{
+    out << "[";
+    for (auto i = map.begin(); i != map.end(); ++i)
+        out << " " << i->first << "=" << i->second;
+    out << " ]";
+
+    return out;
+}
+
+void
+GPUCoalescer::print(ostream& out) const
+{
+    out << "[GPUCoalescer: " << m_version
+        << ", outstanding requests: " << m_outstanding_count
+        << ", read request table: " << m_readRequestTable
+        << ", write request table: " << m_writeRequestTable
+        << "]";
+}
+
+// this can be called from setState whenever coherence permissions are
+// upgraded when invoked, coherence violations will be checked for the
+// given block
+void
+GPUCoalescer::checkCoherence(Addr addr)
+{
+#ifdef CHECK_COHERENCE
+    m_ruby_system->checkGlobalCoherenceInvariant(addr);
+#endif
+}
+
+void
+GPUCoalescer::recordRequestType(SequencerRequestType requestType) {
+    DPRINTF(RubyStats, "Recorded statistic: %s\n",
+            SequencerRequestType_to_string(requestType));
+}
+
+GPUCoalescer::IssueEvent::IssueEvent(GPUCoalescer* _seq)
+    : Event(Progress_Event_Pri), seq(_seq)
+{
+}
+
+
+void
+GPUCoalescer::completeIssue()
+{
+    // newRequests has the cacheline addresses of all the
+    // requests which need to be issued to the memory subsystem
+    // in this cycle
+    int len = newRequests.size();
+    DPRINTF(GPUCoalescer, "Completing issue for %d new requests.\n", len);
+    for (int i = 0; i < len; ++i) {
+        // Get the requests from reqCoalescer table. Get only the
+        // first request for each cacheline, the remaining requests
+        // can be coalesced with the first request. So, only
+        // one request is issued per cacheline.
+        RequestDesc info = reqCoalescer[newRequests[i]][0];
+        PacketPtr pkt = info.first;
+        DPRINTF(GPUCoalescer, "Completing for newReq %d: paddr %#x\n",
+                i, pkt->req->getPaddr());
+        // Insert this request to the read/writeRequestTables. These tables
+        // are used to track aliased requests in makeRequest subroutine
+        bool found = insertRequest(pkt, info.second[PrimaryType]);
+
+        if (found) {
+            panic("GPUCoalescer::makeRequest should never be called if the "
+                  "request is already outstanding\n");
+        }
+
+        // Issue request to ruby subsystem
+        issueRequest(pkt, info.second[SecondaryType]);
+    }
+    newRequests.clear();
+
+    // have Kernel End releases been issued this cycle
+    len = newKernelEnds.size();
+    for (int i = 0; i < len; i++) {
+        kernelCallback(newKernelEnds[i]);
+    }
+    newKernelEnds.clear();
+}
+
+void
+GPUCoalescer::IssueEvent::process()
+{
+    seq->completeIssue();
+}
+
+const char *
+GPUCoalescer::IssueEvent::description() const
+{
+    return "Issue coalesced request";
+}
+
+void
+GPUCoalescer::evictionCallback(Addr address)
+{
+    ruby_eviction_callback(address);
+}
+
+void
+GPUCoalescer::kernelCallback(int wavefront_id)
+{
+    assert(kernelEndList.count(wavefront_id));
+
+    ruby_hit_callback(kernelEndList[wavefront_id]);
+
+    kernelEndList.erase(wavefront_id);
+}
+
+void
+GPUCoalescer::atomicCallback(Addr address,
+                             MachineType mach,
+                             const DataBlock& data)
+{
+    assert(address == makeLineAddress(address));
+
+    DPRINTF(GPUCoalescer, "atomic callback for address %#x\n", address);
+    assert(m_writeRequestTable.count(makeLineAddress(address)));
+
+    RequestTable::iterator i = m_writeRequestTable.find(address);
+    assert(i != m_writeRequestTable.end());
+    GPUCoalescerRequest* srequest = i->second;
+
+    m_writeRequestTable.erase(i);
+    markRemoved();
+
+    assert((srequest->m_type == RubyRequestType_ATOMIC) ||
+           (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
+           (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
+
+
+    // Atomics don't write to cache, so there is no MRU update...
+
+    recordMissLatency(srequest, mach,
+                      srequest->issue_time, Cycles(0), Cycles(0), true, false);
+
+    PacketPtr pkt = srequest->pkt;
+    Addr request_address = pkt->getAddr();
+    Addr request_line_address = makeLineAddress(pkt->getAddr());
+
+    int len = reqCoalescer[request_line_address].size();
+    std::vector<PacketPtr> mylist;
+    for (int i = 0; i < len; ++i) {
+        PacketPtr pkt = reqCoalescer[request_line_address][i].first;
+        assert(srequest->m_type ==
+               reqCoalescer[request_line_address][i].second[PrimaryType]);
+        request_address = (pkt->getAddr());
+        request_line_address = makeLineAddress(request_address);
+        if (pkt->getPtr<uint8_t>() &&
+            srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
+            /* atomics are done in memory, and return the data *before* the atomic op... */
+            memcpy(pkt->getPtr<uint8_t>(),
+                   data.getData(getOffset(request_address),
+                                pkt->getSize()),
+                   pkt->getSize());
+        } else {
+            DPRINTF(MemoryAccess,
+                    "WARNING.  Data not transfered from Ruby to M5 for type " \
+                    "%s\n",
+                    RubyRequestType_to_string(srequest->m_type));
+        }
+
+        // If using the RubyTester, update the RubyTester sender state's
+        // subBlock with the recieved data.  The tester will later access
+        // this state.
+        // Note: RubyPort will access it's sender state before the
+        // RubyTester.
+        if (m_usingRubyTester) {
+            RubyPort::SenderState *requestSenderState =
+                safe_cast<RubyPort::SenderState*>(pkt->senderState);
+            RubyTester::SenderState* testerSenderState =
+                safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
+            testerSenderState->subBlock.mergeFrom(data);
+        }
+
+        mylist.push_back(pkt);
+    }
+    delete srequest;
+    reqCoalescer.erase(request_line_address);
+    assert(!reqCoalescer.count(request_line_address));
+
+    completeHitCallback(mylist, len);
+}
+
+void
+GPUCoalescer::recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
+{
+    if(myMachID == senderMachID) {
+        CP_TCPLdHits++;
+    } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) {
+        CP_TCPLdTransfers++;
+    } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) {
+        CP_TCCLdHits++;
+    } else {
+        CP_LdMiss++;
+    }
+}
+
+void
+GPUCoalescer::recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
+{
+    if(myMachID == senderMachID) {
+        CP_TCPStHits++;
+    } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) {
+        CP_TCPStTransfers++;
+    } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) {
+        CP_TCCStHits++;
+    } else {
+        CP_StMiss++;
+    }
+}
+
+void
+GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist, int len)
+{
+    for (int i = 0; i < len; ++i) {
+        RubyPort::SenderState *ss =
+            safe_cast<RubyPort::SenderState *>(mylist[i]->senderState);
+        MemSlavePort *port = ss->port;
+        assert(port != NULL);
+
+        mylist[i]->senderState = ss->predecessor;
+        delete ss;
+        port->hitCallback(mylist[i]);
+        trySendRetries();
+    }
+
+    testDrainComplete();
+}
+
+PacketPtr
+GPUCoalescer::mapAddrToPkt(Addr address)
+{
+    RequestTable::iterator i = m_readRequestTable.find(address);
+    assert(i != m_readRequestTable.end());
+    GPUCoalescerRequest* request = i->second;
+    return request->pkt;
+}
+
+void
+GPUCoalescer::recordMissLatency(GPUCoalescerRequest* srequest,
+                                MachineType mach,
+                                Cycles initialRequestTime,
+                                Cycles forwardRequestTime,
+                                Cycles firstResponseTime,
+                                bool success, bool isRegion)
+{
+    RubyRequestType type = srequest->m_type;
+    Cycles issued_time = srequest->issue_time;
+    Cycles completion_time = curCycle();
+    assert(completion_time >= issued_time);
+    Cycles total_lat = completion_time - issued_time;
+
+    // cache stats (valid for RfO protocol only)
+    if (mach == MachineType_TCP) {
+        if (type == RubyRequestType_LD) {
+            GPU_TCPLdHits++;
+        } else {
+            GPU_TCPStHits++;
+        }
+    } else if (mach == MachineType_L1Cache_wCC) {
+        if (type == RubyRequestType_LD) {
+            GPU_TCPLdTransfers++;
+        } else {
+            GPU_TCPStTransfers++;
+        }
+    } else if (mach == MachineType_TCC) {
+        if (type == RubyRequestType_LD) {
+            GPU_TCCLdHits++;
+        } else {
+            GPU_TCCStHits++;
+        }
+    } else  {
+        if (type == RubyRequestType_LD) {
+            GPU_LdMiss++;
+        } else {
+            GPU_StMiss++;
+        }
+    }
+
+    // Profile all access latency, even zero latency accesses
+    m_latencyHist.sample(total_lat);
+    m_typeLatencyHist[type]->sample(total_lat);
+
+    // Profile the miss latency for all non-zero demand misses
+    if (total_lat != Cycles(0)) {
+        m_missLatencyHist.sample(total_lat);
+        m_missTypeLatencyHist[type]->sample(total_lat);
+
+        if (mach != MachineType_NUM) {
+            m_missMachLatencyHist[mach]->sample(total_lat);
+            m_missTypeMachLatencyHist[type][mach]->sample(total_lat);
+
+            if ((issued_time <= initialRequestTime) &&
+                (initialRequestTime <= forwardRequestTime) &&
+                (forwardRequestTime <= firstResponseTime) &&
+                (firstResponseTime <= completion_time)) {
+
+                m_IssueToInitialDelayHist[mach]->sample(
+                    initialRequestTime - issued_time);
+                m_InitialToForwardDelayHist[mach]->sample(
+                    forwardRequestTime - initialRequestTime);
+                m_ForwardToFirstResponseDelayHist[mach]->sample(
+                    firstResponseTime - forwardRequestTime);
+                m_FirstResponseToCompletionDelayHist[mach]->sample(
+                    completion_time - firstResponseTime);
+            }
+        }
+
+    }
+
+    DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
+             curTick(), m_version, "Coal",
+             success ? "Done" : "SC_Failed", "", "",
+             printAddress(srequest->pkt->getAddr()), total_lat);
+}
+
+void
+GPUCoalescer::regStats()
+{
+    // These statistical variables are not for display.
+    // The profiler will collate these across different
+    // coalescers and display those collated statistics.
+    m_outstandReqHist.init(10);
+    m_latencyHist.init(10);
+    m_missLatencyHist.init(10);
+
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        m_typeLatencyHist.push_back(new Stats::Histogram());
+        m_typeLatencyHist[i]->init(10);
+
+        m_missTypeLatencyHist.push_back(new Stats::Histogram());
+        m_missTypeLatencyHist[i]->init(10);
+    }
+
+    for (int i = 0; i < MachineType_NUM; i++) {
+        m_missMachLatencyHist.push_back(new Stats::Histogram());
+        m_missMachLatencyHist[i]->init(10);
+
+        m_IssueToInitialDelayHist.push_back(new Stats::Histogram());
+        m_IssueToInitialDelayHist[i]->init(10);
+
+        m_InitialToForwardDelayHist.push_back(new Stats::Histogram());
+        m_InitialToForwardDelayHist[i]->init(10);
+
+        m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram());
+        m_ForwardToFirstResponseDelayHist[i]->init(10);
+
+        m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram());
+        m_FirstResponseToCompletionDelayHist[i]->init(10);
+    }
+
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>());
+
+        for (int j = 0; j < MachineType_NUM; j++) {
+            m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
+            m_missTypeMachLatencyHist[i][j]->init(10);
+        }
+    }
+
+    // GPU cache stats
+    GPU_TCPLdHits
+        .name(name() + ".gpu_tcp_ld_hits")
+        .desc("loads that hit in the TCP")
+        ;
+    GPU_TCPLdTransfers
+        .name(name() + ".gpu_tcp_ld_transfers")
+        .desc("TCP to TCP load transfers")
+        ;
+    GPU_TCCLdHits
+        .name(name() + ".gpu_tcc_ld_hits")
+        .desc("loads that hit in the TCC")
+        ;
+    GPU_LdMiss
+        .name(name() + ".gpu_ld_misses")
+        .desc("loads that miss in the GPU")
+        ;
+
+    GPU_TCPStHits
+        .name(name() + ".gpu_tcp_st_hits")
+        .desc("stores that hit in the TCP")
+        ;
+    GPU_TCPStTransfers
+        .name(name() + ".gpu_tcp_st_transfers")
+        .desc("TCP to TCP store transfers")
+        ;
+    GPU_TCCStHits
+        .name(name() + ".gpu_tcc_st_hits")
+        .desc("stores that hit in the TCC")
+        ;
+    GPU_StMiss
+        .name(name() + ".gpu_st_misses")
+        .desc("stores that miss in the GPU")
+        ;
+
+    // CP cache stats
+    CP_TCPLdHits
+        .name(name() + ".cp_tcp_ld_hits")
+        .desc("loads that hit in the TCP")
+        ;
+    CP_TCPLdTransfers
+        .name(name() + ".cp_tcp_ld_transfers")
+        .desc("TCP to TCP load transfers")
+        ;
+    CP_TCCLdHits
+        .name(name() + ".cp_tcc_ld_hits")
+        .desc("loads that hit in the TCC")
+        ;
+    CP_LdMiss
+        .name(name() + ".cp_ld_misses")
+        .desc("loads that miss in the GPU")
+        ;
+
+    CP_TCPStHits
+        .name(name() + ".cp_tcp_st_hits")
+        .desc("stores that hit in the TCP")
+        ;
+    CP_TCPStTransfers
+        .name(name() + ".cp_tcp_st_transfers")
+        .desc("TCP to TCP store transfers")
+        ;
+    CP_TCCStHits
+        .name(name() + ".cp_tcc_st_hits")
+        .desc("stores that hit in the TCC")
+        ;
+    CP_StMiss
+        .name(name() + ".cp_st_misses")
+        .desc("stores that miss in the GPU")
+        ;
+}
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh
new file mode 100644
index 000000000..dbd47059c
--- /dev/null
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+#define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+
+#include <iostream>
+#include <unordered_map>
+
+#include "base/statistics.hh"
+#include "mem/protocol/HSAScope.hh"
+#include "mem/protocol/HSASegment.hh"
+#include "mem/protocol/PrefetchBit.hh"
+#include "mem/protocol/RubyAccessMode.hh"
+#include "mem/protocol/RubyRequestType.hh"
+#include "mem/protocol/SequencerRequestType.hh"
+#include "mem/request.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Consumer.hh"
+#include "mem/ruby/system/RubyPort.hh"
+
+class DataBlock;
+class CacheMsg;
+class MachineID;
+class CacheMemory;
+
+class RubyGPUCoalescerParams;
+
+HSAScope reqScopeToHSAScope(Request* req);
+HSASegment reqSegmentToHSASegment(Request* req);
+
+struct GPUCoalescerRequest
+{
+    PacketPtr pkt;
+    RubyRequestType m_type;
+    Cycles issue_time;
+
+    GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
+                        Cycles _issue_time)
+        : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
+    {}
+};
+
+std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
+
+class GPUCoalescer : public RubyPort
+{
+  public:
+    typedef RubyGPUCoalescerParams Params;
+    GPUCoalescer(const Params *);
+    ~GPUCoalescer();
+
+    // Public Methods
+    void wakeup(); // Used only for deadlock detection
+
+    void printProgress(std::ostream& out) const;
+    void resetStats();
+    void collateStats();
+    void regStats();
+
+    void writeCallback(Addr address, DataBlock& data);
+
+    void writeCallback(Addr address,
+                       MachineType mach,
+                       DataBlock& data);
+
+    void writeCallback(Addr address,
+                       MachineType mach,
+                       DataBlock& data,
+                       Cycles initialRequestTime,
+                       Cycles forwardRequestTime,
+                       Cycles firstResponseTime,
+                       bool isRegion);
+
+    void writeCallback(Addr address,
+                       MachineType mach,
+                       DataBlock& data,
+                       Cycles initialRequestTime,
+                       Cycles forwardRequestTime,
+                       Cycles firstResponseTime);
+
+    void readCallback(Addr address, DataBlock& data);
+
+    void readCallback(Addr address,
+                      MachineType mach,
+                      DataBlock& data);
+
+    void readCallback(Addr address,
+                      MachineType mach,
+                      DataBlock& data,
+                      Cycles initialRequestTime,
+                      Cycles forwardRequestTime,
+                      Cycles firstResponseTime);
+
+    void readCallback(Addr address,
+                      MachineType mach,
+                      DataBlock& data,
+                      Cycles initialRequestTime,
+                      Cycles forwardRequestTime,
+                      Cycles firstResponseTime,
+                      bool isRegion);
+    /* atomics need their own callback because the data
+       might be const coming from SLICC */
+    void atomicCallback(Addr address,
+                        MachineType mach,
+                        const DataBlock& data);
+
+    void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
+    void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
+
+    // Alternate implementations in VIPER Coalescer
+    virtual RequestStatus makeRequest(PacketPtr pkt);
+
+    int outstandingCount() const { return m_outstanding_count; }
+
+    bool
+    isDeadlockEventScheduled() const
+    {
+        return deadlockCheckEvent.scheduled();
+    }
+
+    void
+    descheduleDeadlockEvent()
+    {
+        deschedule(deadlockCheckEvent);
+    }
+
+    bool empty() const;
+
+    void print(std::ostream& out) const;
+    void checkCoherence(Addr address);
+
+    void markRemoved();
+    void removeRequest(GPUCoalescerRequest* request);
+    void evictionCallback(Addr address);
+    void completeIssue();
+
+    void insertKernel(int wavefront_id, PacketPtr pkt);
+
+    void recordRequestType(SequencerRequestType requestType);
+    Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
+
+    Stats::Histogram& getLatencyHist() { return m_latencyHist; }
+    Stats::Histogram& getTypeLatencyHist(uint32_t t)
+    { return *m_typeLatencyHist[t]; }
+
+    Stats::Histogram& getMissLatencyHist()
+    { return m_missLatencyHist; }
+    Stats::Histogram& getMissTypeLatencyHist(uint32_t t)
+    { return *m_missTypeLatencyHist[t]; }
+
+    Stats::Histogram& getMissMachLatencyHist(uint32_t t) const
+    { return *m_missMachLatencyHist[t]; }
+
+    Stats::Histogram&
+    getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
+    { return *m_missTypeMachLatencyHist[r][t]; }
+
+    Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const
+    { return *m_IssueToInitialDelayHist[t]; }
+
+    Stats::Histogram&
+    getInitialToForwardDelayHist(const MachineType t) const
+    { return *m_InitialToForwardDelayHist[t]; }
+
+    Stats::Histogram&
+    getForwardRequestToFirstResponseHist(const MachineType t) const
+    { return *m_ForwardToFirstResponseDelayHist[t]; }
+
+    Stats::Histogram&
+    getFirstResponseToCompletionDelayHist(const MachineType t) const
+    { return *m_FirstResponseToCompletionDelayHist[t]; }
+
+  // Changed to protected to enable inheritance by VIPER Coalescer
+  protected:
+    bool tryCacheAccess(Addr addr, RubyRequestType type,
+                        Addr pc, RubyAccessMode access_mode,
+                        int size, DataBlock*& data_ptr);
+    // Alternate implementations in VIPER Coalescer
+    virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
+
+    void kernelCallback(int wavfront_id);
+
+    void hitCallback(GPUCoalescerRequest* request,
+                     MachineType mach,
+                     DataBlock& data,
+                     bool success,
+                     Cycles initialRequestTime,
+                     Cycles forwardRequestTime,
+                     Cycles firstResponseTime,
+                     bool isRegion);
+    void recordMissLatency(GPUCoalescerRequest* request,
+                           MachineType mach,
+                           Cycles initialRequestTime,
+                           Cycles forwardRequestTime,
+                           Cycles firstResponseTime,
+                           bool success, bool isRegion);
+    void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
+    PacketPtr mapAddrToPkt(Addr address);
+
+
+    RequestStatus getRequestStatus(PacketPtr pkt,
+                                   RubyRequestType request_type);
+    bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
+
+    bool handleLlsc(Addr address, GPUCoalescerRequest* request);
+
+    // Private copy constructor and assignment operator
+    GPUCoalescer(const GPUCoalescer& obj);
+    GPUCoalescer& operator=(const GPUCoalescer& obj);
+
+    class IssueEvent : public Event
+    {
+      private:
+        GPUCoalescer *seq;
+      public:
+        IssueEvent(GPUCoalescer *_seq);
+        void process();
+        const char *description() const;
+    };
+
+    IssueEvent issueEvent;
+
+
+  // Changed to protected to enable inheritance by VIPER Coalescer
+  protected:
+    int m_max_outstanding_requests;
+    int m_deadlock_threshold;
+
+    CacheMemory* m_dataCache_ptr;
+    CacheMemory* m_instCache_ptr;
+
+    // The cache access latency for this GPU data cache. This is assessed at the
+    // beginning of each access. This should be very similar to the
+    // implementation in Sequencer() as this is very much like a Sequencer
+    Cycles m_data_cache_hit_latency;
+
+    // We need to track both the primary and secondary request types.
+    // The secondary request type comprises a subset of RubyRequestTypes that
+    // are understood by the L1 Controller. A primary request type can be any
+    // RubyRequestType.
+    enum {PrimaryType, SecondaryType};
+    typedef std::pair<PacketPtr, std::vector<RubyRequestType> > RequestDesc;
+    typedef std::unordered_map<Addr, std::vector<RequestDesc> > CoalescingTable;
+    CoalescingTable reqCoalescer;
+    std::vector<Addr> newRequests;
+
+    typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
+    RequestTable m_writeRequestTable;
+    RequestTable m_readRequestTable;
+    // Global outstanding request count, across all request tables
+    int m_outstanding_count;
+    bool m_deadlock_check_scheduled;
+    std::unordered_map<int, PacketPtr> kernelEndList;
+    std::vector<int> newKernelEnds;
+
+    int m_store_waiting_on_load_cycles;
+    int m_store_waiting_on_store_cycles;
+    int m_load_waiting_on_store_cycles;
+    int m_load_waiting_on_load_cycles;
+
+    bool m_usingNetworkTester;
+
+    class GPUCoalescerWakeupEvent : public Event
+    {
+      private:
+        GPUCoalescer *m_GPUCoalescer_ptr;
+
+      public:
+        GPUCoalescerWakeupEvent(GPUCoalescer *_seq) :
+            m_GPUCoalescer_ptr(_seq) {}
+        void process() { m_GPUCoalescer_ptr->wakeup(); }
+        const char *description() const
+        {
+            return "GPUCoalescer deadlock check";
+        }
+    };
+
+    GPUCoalescerWakeupEvent deadlockCheckEvent;
+    bool assumingRfOCoherence;
+
+    // m5 style stats for TCP hit/miss counts
+    Stats::Scalar GPU_TCPLdHits;
+    Stats::Scalar GPU_TCPLdTransfers;
+    Stats::Scalar GPU_TCCLdHits;
+    Stats::Scalar GPU_LdMiss;
+
+    Stats::Scalar GPU_TCPStHits;
+    Stats::Scalar GPU_TCPStTransfers;
+    Stats::Scalar GPU_TCCStHits;
+    Stats::Scalar GPU_StMiss;
+
+    Stats::Scalar CP_TCPLdHits;
+    Stats::Scalar CP_TCPLdTransfers;
+    Stats::Scalar CP_TCCLdHits;
+    Stats::Scalar CP_LdMiss;
+
+    Stats::Scalar CP_TCPStHits;
+    Stats::Scalar CP_TCPStTransfers;
+    Stats::Scalar CP_TCCStHits;
+    Stats::Scalar CP_StMiss;
+
+    //! Histogram for number of outstanding requests per cycle.
+    Stats::Histogram m_outstandReqHist;
+
+    //! Histogram for holding latency profile of all requests.
+    Stats::Histogram m_latencyHist;
+    std::vector<Stats::Histogram *> m_typeLatencyHist;
+
+    //! Histogram for holding latency profile of all requests that
+    //! miss in the controller connected to this sequencer.
+    Stats::Histogram m_missLatencyHist;
+    std::vector<Stats::Histogram *> m_missTypeLatencyHist;
+
+    //! Histograms for profiling the latencies for requests that
+    //! required external messages.
+    std::vector<Stats::Histogram *> m_missMachLatencyHist;
+    std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist;
+
+    //! Histograms for recording the breakdown of miss latency
+    std::vector<Stats::Histogram *> m_IssueToInitialDelayHist;
+    std::vector<Stats::Histogram *> m_InitialToForwardDelayHist;
+    std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist;
+    std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist;
+};
+
+inline std::ostream&
+operator<<(std::ostream& out, const GPUCoalescer& obj)
+{
+    obj.print(out);
+    out << std::flush;
+    return out;
+}
+
+#endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+
diff --git a/src/mem/ruby/system/GPUCoalescer.py b/src/mem/ruby/system/GPUCoalescer.py
new file mode 100644
index 000000000..0c19f875d
--- /dev/null
+++ b/src/mem/ruby/system/GPUCoalescer.py
@@ -0,0 +1,48 @@
+#  Copyright (c) 2015 Advanced Micro Devices, Inc.
+#  All rights reserved.
+#
+#  For use for simulation and test purposes only
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#  this list of conditions and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright notice,
+#  this list of conditions and the following disclaimer in the documentation
+#  and/or other materials provided with the distribution.
+#
+#  3. Neither the name of the copyright holder nor the names of its contributors
+#  may be used to endorse or promote products derived from this software
+#  without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Steve Reinhardt
+#          Brad Beckmann
+
+from m5.params import *
+from m5.proxy import *
+from Sequencer import *
+
+class RubyGPUCoalescer(RubySequencer):
+   type = 'RubyGPUCoalescer'
+   cxx_class = 'GPUCoalescer'
+   cxx_header = "mem/ruby/system/GPUCoalescer.hh"
+
+   # max_outstanding_requests = (wave front slots) x (wave front size)
+   max_outstanding_requests = Param.Int(40*64,
+                                "max requests (incl. prefetches) outstanding")
+   assume_rfo = Param.Bool(True, "assume protocol implementes Read for "
+                           "Ownership coherence");
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 5a5f528bb..bf4002126 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -60,7 +60,8 @@ RubyPort::RubyPort(const Params *p)
       memSlavePort(csprintf("%s-mem-slave-port", name()), this,
                    p->ruby_system->getAccessBackingStore(), -1,
                    p->no_retry_on_stall),
-      gotAddrRanges(p->port_master_connection_count)
+      gotAddrRanges(p->port_master_connection_count),
+      m_isCPUSequencer(p->is_cpu_sequencer)
 {
     assert(m_version != -1);
 
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index 07e0fde5a..6bd92b654 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -167,6 +167,8 @@ class RubyPort : public MemObject
     uint32_t getId() { return m_version; }
     DrainState drain() override;
 
+    bool isCPUSequencer() { return m_isCPUSequencer; }
+
   protected:
     void trySendRetries();
     void ruby_hit_callback(PacketPtr pkt);
@@ -218,6 +220,8 @@ class RubyPort : public MemObject
     // that should be called when the Sequencer becomes available after a stall.
     //
     std::vector<MemSlavePort *> retryList;
+
+    bool m_isCPUSequencer;
 };
 
 #endif // __MEM_RUBY_SYSTEM_RUBYPORT_HH__
diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc
index 1ecd2e098..e1717e519 100644
--- a/src/mem/ruby/system/RubySystem.cc
+++ b/src/mem/ruby/system/RubySystem.cc
@@ -107,7 +107,7 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
     Sequencer* sequencer_ptr = NULL;
 
     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
-        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
+        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer());
         if (sequencer_ptr == NULL) {
             sequencer_ptr = sequencer_map[cntrl];
         }
diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript
index 8c5077362..b67311bca 100644
--- a/src/mem/ruby/system/SConscript
+++ b/src/mem/ruby/system/SConscript
@@ -33,12 +33,22 @@ Import('*')
 if env['PROTOCOL'] == 'None':
     Return()
 
+if env['BUILD_GPU']:
+    SimObject('GPUCoalescer.py')
 SimObject('RubySystem.py')
 SimObject('Sequencer.py')
+SimObject('WeightedLRUReplacementPolicy.py')
+if env['BUILD_GPU']:
+    SimObject('VIPERCoalescer.py')
 
 Source('CacheRecorder.cc')
 Source('DMASequencer.cc')
+if env['BUILD_GPU']:
+    Source('GPUCoalescer.cc')
 Source('RubyPort.cc')
 Source('RubyPortProxy.cc')
 Source('RubySystem.cc')
 Source('Sequencer.cc')
+if env['BUILD_GPU']:
+    Source('VIPERCoalescer.cc')
+Source('WeightedLRUPolicy.cc')
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 50418c700..c2727b41d 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -63,6 +63,7 @@ Sequencer::Sequencer(const Params *p)
     m_max_outstanding_requests = p->max_outstanding_requests;
     m_deadlock_threshold = p->deadlock_threshold;
 
+    m_coreId = p->coreid; // for tracking the two CorePair sequencers
     assert(m_max_outstanding_requests > 0);
     assert(m_deadlock_threshold > 0);
     assert(m_instCache_ptr != NULL);
@@ -593,6 +594,8 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
     ContextID proc_id = pkt->req->hasContextId() ?
         pkt->req->contextId() : InvalidContextID;
 
+    ContextID core_id = coreId();
+
     // If valid, copy the pc to the ruby request
     Addr pc = 0;
     if (pkt->req->hasPC()) {
@@ -607,7 +610,7 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
                                       nullptr : pkt->getPtr<uint8_t>(),
                                       pkt->getSize(), pc, secondary_type,
                                       RubyAccessMode_Supervisor, pkt,
-                                      PrefetchBit_No, proc_id);
+                                      PrefetchBit_No, proc_id, core_id);
 
     DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n",
             curTick(), m_version, "Seq", "Begin", "", "",
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index 47af7ea1e..2a2f49587 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -99,6 +99,7 @@ class Sequencer : public RubyPort
     void markRemoved();
     void evictionCallback(Addr address);
     void invalidateSC(Addr address);
+    int coreId() const { return m_coreId; }
 
     void recordRequestType(SequencerRequestType requestType);
     Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
@@ -198,6 +199,8 @@ class Sequencer : public RubyPort
     Stats::Scalar m_load_waiting_on_store;
     Stats::Scalar m_load_waiting_on_load;
 
+    int m_coreId;
+
     bool m_usingNetworkTester;
 
     //! Histogram for number of outstanding requests per cycle.
diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py
index 7c90eb29c..d6ee0aa2f 100644
--- a/src/mem/ruby/system/Sequencer.py
+++ b/src/mem/ruby/system/Sequencer.py
@@ -32,54 +32,58 @@ from m5.proxy import *
 from MemObject import MemObject
 
 class RubyPort(MemObject):
-    type = 'RubyPort'
-    abstract = True
-    cxx_header = "mem/ruby/system/RubyPort.hh"
-    version = Param.Int(0, "")
+   type = 'RubyPort'
+   abstract = True
+   cxx_header = "mem/ruby/system/RubyPort.hh"
+   version = Param.Int(0, "")
 
-    slave = VectorSlavePort("CPU slave port")
-    master = VectorMasterPort("CPU master port")
-    pio_master_port = MasterPort("Ruby mem master port")
-    mem_master_port = MasterPort("Ruby mem master port")
-    pio_slave_port = SlavePort("Ruby pio slave port")
-    mem_slave_port = SlavePort("Ruby memory port")
+   slave = VectorSlavePort("CPU slave port")
+   master = VectorMasterPort("CPU master port")
+   pio_master_port = MasterPort("Ruby mem master port")
+   mem_master_port = MasterPort("Ruby mem master port")
+   pio_slave_port = SlavePort("Ruby pio slave port")
+   mem_slave_port = SlavePort("Ruby memory port")
 
-    using_ruby_tester = Param.Bool(False, "")
-    no_retry_on_stall = Param.Bool(False, "")
-    ruby_system = Param.RubySystem(Parent.any, "")
-    system = Param.System(Parent.any, "system object")
-    support_data_reqs = Param.Bool(True, "data cache requests supported")
-    support_inst_reqs = Param.Bool(True, "inst cache requests supported")
+   using_ruby_tester = Param.Bool(False, "")
+   no_retry_on_stall = Param.Bool(False, "")
+   ruby_system = Param.RubySystem(Parent.any, "")
+   system = Param.System(Parent.any, "system object")
+   support_data_reqs = Param.Bool(True, "data cache requests supported")
+   support_inst_reqs = Param.Bool(True, "inst cache requests supported")
+   is_cpu_sequencer = Param.Bool(True, "connected to a cpu")
 
 class RubyPortProxy(RubyPort):
-    type = 'RubyPortProxy'
-    cxx_header = "mem/ruby/system/RubyPortProxy.hh"
+   type = 'RubyPortProxy'
+   cxx_header = "mem/ruby/system/RubyPortProxy.hh"
 
 class RubySequencer(RubyPort):
-    type = 'RubySequencer'
-    cxx_class = 'Sequencer'
-    cxx_header = "mem/ruby/system/Sequencer.hh"
+   type = 'RubySequencer'
+   cxx_class = 'Sequencer'
+   cxx_header = "mem/ruby/system/Sequencer.hh"
 
-    icache = Param.RubyCache("")
-    dcache = Param.RubyCache("")
-    # Cache latencies currently assessed at the beginning of each access
-    # NOTE: Setting these values to a value greater than one will result in
-    # O3 CPU pipeline bubbles and negatively impact performance
-    # TODO: Latencies should be migrated into each top-level cache controller
-    icache_hit_latency = Param.Cycles(1, "Inst cache hit latency")
-    dcache_hit_latency = Param.Cycles(1, "Data cache hit latency")
-    max_outstanding_requests = Param.Int(16,
-        "max requests (incl. prefetches) outstanding")
-    deadlock_threshold = Param.Cycles(500000,
-        "max outstanding cycles for a request before deadlock/livelock declared")
-    using_network_tester = Param.Bool(False, "")
+   icache = Param.RubyCache("")
+   dcache = Param.RubyCache("")
+   # Cache latencies currently assessed at the beginning of each access
+   # NOTE: Setting these values to a value greater than one will result in
+   # O3 CPU pipeline bubbles and negatively impact performance
+   # TODO: Latencies should be migrated into each top-level cache controller
+   icache_hit_latency = Param.Cycles(1, "Inst cache hit latency")
+   dcache_hit_latency = Param.Cycles(1, "Data cache hit latency")
+   max_outstanding_requests = Param.Int(16,
+       "max requests (incl. prefetches) outstanding")
+   deadlock_threshold = Param.Cycles(500000,
+       "max outstanding cycles for a request before deadlock/livelock declared")
+   using_network_tester = Param.Bool(False, "")
+   # id used by protocols that support multiple sequencers per controller
+   # 99 is the dummy default value
+   coreid = Param.Int(99, "CorePair core id")
 
 class DMASequencer(MemObject):
-    type = 'DMASequencer'
-    cxx_header = "mem/ruby/system/DMASequencer.hh"
+   type = 'DMASequencer'
+   cxx_header = "mem/ruby/system/DMASequencer.hh"
 
-    version = Param.Int(0, "")
-    slave = SlavePort("Device slave port")
-    using_ruby_tester = Param.Bool(False, "")
-    ruby_system = Param.RubySystem(Parent.any, "")
-    system = Param.System(Parent.any, "system object")
+   version = Param.Int(0, "")
+   slave = SlavePort("Device slave port")
+   using_ruby_tester = Param.Bool(False, "")
+   ruby_system = Param.RubySystem(Parent.any, "")
+   system = Param.System(Parent.any, "system object")
diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc
new file mode 100644
index 000000000..ca91f2723
--- /dev/null
+++ b/src/mem/ruby/system/VIPERCoalescer.cc
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#include "base/misc.hh"
+#include "base/str.hh"
+#include "config/the_isa.hh"
+
+#if THE_ISA == X86_ISA
+#include "arch/x86/insts/microldstop.hh"
+
+#endif // X86_ISA
+#include "mem/ruby/system/VIPERCoalescer.hh"
+
+#include "cpu/testers/rubytest/RubyTester.hh"
+#include "debug/GPUCoalescer.hh"
+#include "debug/MemoryAccess.hh"
+#include "mem/packet.hh"
+#include "mem/ruby/common/SubBlock.hh"
+#include "mem/ruby/network/MessageBuffer.hh"
+#include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/slicc_interface/AbstractController.hh"
+#include "mem/ruby/slicc_interface/RubyRequest.hh"
+#include "mem/ruby/structures/CacheMemory.hh"
+#include "mem/ruby/system/GPUCoalescer.hh"
+#include "mem/ruby/system/RubySystem.hh"
+#include "params/VIPERCoalescer.hh"
+
+using namespace std;
+
+VIPERCoalescer *
+VIPERCoalescerParams::create()
+{
+    return new VIPERCoalescer(this);
+}
+
+VIPERCoalescer::VIPERCoalescer(const Params *p)
+    : GPUCoalescer(p)
+{
+    m_max_wb_per_cycle=p->max_wb_per_cycle;
+    m_max_inv_per_cycle=p->max_inv_per_cycle;
+    m_outstanding_inv = 0;
+    m_outstanding_wb = 0;
+}
+
+VIPERCoalescer::~VIPERCoalescer()
+{
+}
+
+// Analyzes the packet to see if this request can be coalesced.
+// If request can be coalesced, this request is added to the reqCoalescer table
+// and makeRequest returns RequestStatus_Issued;
+// If this is the first request to a cacheline, request is added to both
+// newRequests queue and to the reqCoalescer table; makeRequest
+// returns RequestStatus_Issued.
+// If there is a pending request to this cacheline and this request
+// can't be coalesced, RequestStatus_Aliased is returned and
+// the packet needs to be reissued.
+RequestStatus
+VIPERCoalescer::makeRequest(PacketPtr pkt)
+{
+    if (m_outstanding_wb | m_outstanding_inv) {
+        DPRINTF(GPUCoalescer,
+                "There are %d Writebacks and %d Invalidatons\n",
+                m_outstanding_wb, m_outstanding_inv);
+    }
+    // Are we in the middle of a release
+    if ((m_outstanding_wb) > 0) {
+        if (pkt->req->isKernel()) {
+            // Everythign is fine
+            // Barriers and Kernel End scan coalesce
+            // If it is a Kerenl Begin flush the cache
+            if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) {
+                invL1();
+            }
+
+            if (pkt->req->isRelease()) {
+                insertKernel(pkt->req->contextId(), pkt);
+            }
+
+            return RequestStatus_Issued;
+        }
+//        return RequestStatus_Aliased;
+    } else if (pkt->req->isKernel() && pkt->req->isRelease()) {
+        // Flush Dirty Data on Kernel End
+        // isKernel + isRelease
+        insertKernel(pkt->req->contextId(), pkt);
+        wbL1();
+        if(m_outstanding_wb == 0) {
+            for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
+                newKernelEnds.push_back(it->first);
+            }
+            completeIssue();
+        }
+        return RequestStatus_Issued;
+    }
+    RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
+    if (requestStatus!=RequestStatus_Issued) {
+        // Request not isssued
+        // enqueue Retry
+        DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
+        return requestStatus;
+    } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
+        // Invalidate clean Data on Kernel Begin
+        // isKernel + isAcquire
+        invL1();
+    } else if (pkt->req->isAcquire() && pkt->req->isRelease()) {
+        // Deschedule the AtomicAcqRel and
+        // Flush and Invalidate the L1 cache
+        invwbL1();
+        if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
+            DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
+            deschedule(issueEvent);
+        }
+    } else if (pkt->req->isRelease()) {
+        // Deschedule the StoreRel and
+        // Flush the L1 cache
+        wbL1();
+        if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
+            DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
+            deschedule(issueEvent);
+        }
+    } else if (pkt->req->isAcquire()) {
+        // LoadAcq or AtomicAcq
+        // Invalidate the L1 cache
+        invL1();
+    }
+    // Request was successful
+    if (m_outstanding_wb == 0) {
+        if (!issueEvent.scheduled()) {
+            DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n");
+            schedule(issueEvent, curTick());
+        }
+    }
+    return RequestStatus_Issued;
+}
+
+void
+VIPERCoalescer::wbCallback(Addr addr)
+{
+    m_outstanding_wb--;
+    // if L1 Flush Complete
+    // attemnpt to schedule issueEvent
+    assert(((int) m_outstanding_wb) >= 0);
+    if (m_outstanding_wb == 0) {
+        for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
+            newKernelEnds.push_back(it->first);
+        }
+        completeIssue();
+    }
+    trySendRetries();
+}
+
+void
+VIPERCoalescer::invCallback(Addr addr)
+{
+    m_outstanding_inv--;
+    // if L1 Flush Complete
+    // attemnpt to schedule issueEvent
+    // This probably won't happen, since
+    // we dont wait on cache invalidations
+    if (m_outstanding_wb == 0) {
+        for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
+            newKernelEnds.push_back(it->first);
+        }
+        completeIssue();
+    }
+    trySendRetries();
+}
+
+/**
+  * Invalidate L1 cache (Acquire)
+  */
+void
+VIPERCoalescer::invL1()
+{
+    int size = m_dataCache_ptr->getNumBlocks();
+    DPRINTF(GPUCoalescer,
+            "There are %d Invalidations outstanding before Cache Walk\n",
+            m_outstanding_inv);
+    // Walk the cache
+    for (int i = 0; i < size; i++) {
+        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
+        // Evict Read-only data
+        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
+            clockEdge(), addr, (uint8_t*) 0, 0, 0,
+            RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
+            nullptr);
+        assert(m_mandatory_q_ptr != NULL);
+        m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+        m_outstanding_inv++;
+    }
+    DPRINTF(GPUCoalescer,
+            "There are %d Invalidatons outstanding after Cache Walk\n",
+            m_outstanding_inv);
+}
+
+/**
+  * Writeback L1 cache (Release)
+  */
+void
+VIPERCoalescer::wbL1()
+{
+    int size = m_dataCache_ptr->getNumBlocks();
+    DPRINTF(GPUCoalescer,
+            "There are %d Writebacks outstanding before Cache Walk\n",
+            m_outstanding_wb);
+    // Walk the cache
+    for (int i = 0; i < size; i++) {
+        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
+        // Write dirty data back
+        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
+            clockEdge(), addr, (uint8_t*) 0, 0, 0,
+            RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
+            nullptr);
+        assert(m_mandatory_q_ptr != NULL);
+        m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+        m_outstanding_wb++;
+    }
+    DPRINTF(GPUCoalescer,
+            "There are %d Writebacks outstanding after Cache Walk\n",
+            m_outstanding_wb);
+}
+
+/**
+  * Invalidate and Writeback L1 cache (Acquire&Release)
+  */
+void
+VIPERCoalescer::invwbL1()
+{
+    int size = m_dataCache_ptr->getNumBlocks();
+    // Walk the cache
+    for(int i = 0; i < size; i++) {
+        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
+        // Evict Read-only data
+        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
+            clockEdge(), addr, (uint8_t*) 0, 0, 0,
+            RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
+            nullptr);
+        assert(m_mandatory_q_ptr != NULL);
+        m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+        m_outstanding_inv++;
+    }
+    // Walk the cache
+    for(int i = 0; i< size; i++) {
+        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
+        // Write dirty data back
+        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
+            clockEdge(), addr, (uint8_t*) 0, 0, 0,
+            RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
+            nullptr);
+        assert(m_mandatory_q_ptr != NULL);
+        m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+        m_outstanding_wb++;
+    }
+}
diff --git a/src/mem/ruby/system/VIPERCoalescer.hh b/src/mem/ruby/system/VIPERCoalescer.hh
new file mode 100644
index 000000000..af6e44e7f
--- /dev/null
+++ b/src/mem/ruby/system/VIPERCoalescer.hh
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#ifndef __MEM_RUBY_SYSTEM_VI_COALESCER_HH__
+#define __MEM_RUBY_SYSTEM_VI_COALESCER_HH__
+
+#include <iostream>
+
+#include "mem/protocol/PrefetchBit.hh"
+#include "mem/protocol/RubyAccessMode.hh"
+#include "mem/protocol/RubyRequestType.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Consumer.hh"
+#include "mem/ruby/system/GPUCoalescer.hh"
+#include "mem/ruby/system/RubyPort.hh"
+
+class DataBlock;
+class CacheMsg;
+class MachineID;
+class CacheMemory;
+
+class VIPERCoalescerParams;
+
+class VIPERCoalescer : public GPUCoalescer
+{
+  public:
+    typedef VIPERCoalescerParams Params;
+    VIPERCoalescer(const Params *);
+    ~VIPERCoalescer();
+    void wbCallback(Addr address);
+    void invCallback(Addr address);
+    RequestStatus makeRequest(PacketPtr pkt);
+  private:
+    void invL1();
+    void wbL1();
+    void invwbL1();
+    uint64_t m_outstanding_inv;
+    uint64_t m_outstanding_wb;
+    uint64_t m_max_inv_per_cycle;
+    uint64_t m_max_wb_per_cycle;
+};
+#endif // __MEM_RUBY_SYSTEM_VI_COALESCER_HH__
+
diff --git a/src/mem/ruby/system/VIPERCoalescer.py b/src/mem/ruby/system/VIPERCoalescer.py
new file mode 100644
index 000000000..05c74386f
--- /dev/null
+++ b/src/mem/ruby/system/VIPERCoalescer.py
@@ -0,0 +1,45 @@
+#  Copyright (c) 2015 Advanced Micro Devices, Inc.
+#  All rights reserved.
+#
+#  For use for simulation and test purposes only
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#  this list of conditions and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright notice,
+#  this list of conditions and the following disclaimer in the documentation
+#  and/or other materials provided with the distribution.
+#
+#  3. Neither the name of the copyright holder nor the names of its contributors
+#  may be used to endorse or promote products derived from this software
+#  without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Steve Reinhardt
+#          Brad Beckmann
+
+from m5.params import *
+from m5.proxy import *
+from GPUCoalescer import *
+
+class VIPERCoalescer(RubyGPUCoalescer):
+    type = 'VIPERCoalescer'
+    cxx_class = 'VIPERCoalescer'
+    cxx_header = "mem/ruby/system/VIPERCoalescer.hh"
+    max_inv_per_cycle = Param.Int(32, "max invalidations per cycle")
+    max_wb_per_cycle = Param.Int(32, "max writebacks per cycle")
+    assume_rfo = False
diff --git a/src/mem/ruby/system/WeightedLRUPolicy.cc b/src/mem/ruby/system/WeightedLRUPolicy.cc
new file mode 100644
index 000000000..5baa4d9a5
--- /dev/null
+++ b/src/mem/ruby/system/WeightedLRUPolicy.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Derek Hower
+ */
+
+#include "mem/ruby/system/WeightedLRUPolicy.hh"
+
+WeightedLRUPolicy::WeightedLRUPolicy(const Params* p)
+    : AbstractReplacementPolicy(p), m_cache(p->cache)
+{
+    m_last_occ_ptr = new int*[m_num_sets];
+    for(unsigned i = 0; i < m_num_sets; i++){
+        m_last_occ_ptr[i] = new int[m_assoc];
+        for(unsigned j = 0; j < m_assoc; j++){
+            m_last_occ_ptr[i][j] = 0;
+        }
+    }
+}
+
+WeightedLRUPolicy *
+WeightedLRUReplacementPolicyParams::create()
+{
+    return new WeightedLRUPolicy(this);
+}
+
+WeightedLRUPolicy::~WeightedLRUPolicy()
+{
+    if (m_last_occ_ptr != NULL){
+        for (unsigned i = 0; i < m_num_sets; i++){
+            if (m_last_occ_ptr[i] != NULL){
+                delete[] m_last_occ_ptr[i];
+            }
+        }
+        delete[] m_last_occ_ptr;
+    }
+}
+
+void
+WeightedLRUPolicy::touch(int64_t set, int64_t index, Tick time)
+{
+    assert(index >= 0 && index < m_assoc);
+    assert(set >= 0 && set < m_num_sets);
+
+    m_last_ref_ptr[set][index] = time;
+}
+
+void
+WeightedLRUPolicy::touch(int64_t set, int64_t index, Tick time, int occupancy)
+{
+    assert(index >= 0 && index < m_assoc);
+    assert(set >= 0 && set < m_num_sets);
+
+    m_last_ref_ptr[set][index] = time;
+    m_last_occ_ptr[set][index] = occupancy;
+}
+
+int64_t
+WeightedLRUPolicy::getVictim(int64_t set) const
+{
+    Tick time, smallest_time;
+    int64_t smallest_index;
+
+    smallest_index = 0;
+    smallest_time = m_last_ref_ptr[set][0];
+    int smallest_weight = m_last_ref_ptr[set][0];
+
+    for (unsigned i = 1; i < m_assoc; i++) {
+
+        int weight = m_last_occ_ptr[set][i];
+        if (weight < smallest_weight) {
+            smallest_weight = weight;
+            smallest_index = i;
+            smallest_time = m_last_ref_ptr[set][i];
+        } else if (weight == smallest_weight) {
+            time = m_last_ref_ptr[set][i];
+            if (time < smallest_time) {
+                smallest_index = i;
+                smallest_time = time;
+            }
+        }
+    }
+    return smallest_index;
+}
diff --git a/src/mem/ruby/system/WeightedLRUPolicy.hh b/src/mem/ruby/system/WeightedLRUPolicy.hh
new file mode 100644
index 000000000..3150779b2
--- /dev/null
+++ b/src/mem/ruby/system/WeightedLRUPolicy.hh
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#ifndef __MEM_RUBY_SYSTEM_WEIGHTEDLRUPOLICY_HH__
+#define __MEM_RUBY_SYSTEM_WEIGHTEDLRUPOLICY_HH__
+
+#include "mem/ruby/structures/AbstractReplacementPolicy.hh"
+#include "mem/ruby/structures/CacheMemory.hh"
+#include "params/WeightedLRUReplacementPolicy.hh"
+
+/* Simple true LRU replacement policy */
+
+class WeightedLRUPolicy : public AbstractReplacementPolicy
+{
+  public:
+    typedef WeightedLRUReplacementPolicyParams Params;
+    WeightedLRUPolicy(const Params* p);
+    ~WeightedLRUPolicy();
+
+    void touch(int64_t set, int64_t way, Tick time);
+    void touch(int64_t set, int64_t way, Tick time, int occupancy);
+    int64_t getVictim(int64_t set) const override;
+
+    bool useOccupancy() const { return true; }
+
+    CacheMemory * m_cache;
+    int **m_last_occ_ptr;
+};
+
+#endif // __MEM_RUBY_SYSTEM_WeightedLRUPolicy_HH__
diff --git a/src/mem/ruby/system/WeightedLRUReplacementPolicy.py b/src/mem/ruby/system/WeightedLRUReplacementPolicy.py
new file mode 100644
index 000000000..e7de33496
--- /dev/null
+++ b/src/mem/ruby/system/WeightedLRUReplacementPolicy.py
@@ -0,0 +1,45 @@
+#
+#  Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+#  All rights reserved.
+#
+#  For use for simulation and test purposes only
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#  this list of conditions and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright notice,
+#  this list of conditions and the following disclaimer in the documentation
+#  and/or other materials provided with the distribution.
+#
+#  3. Neither the name of the copyright holder nor the names of its contributors
+#  may be used to endorse or promote products derived from this software
+#  without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+#
+#  Author: Derek Hower
+#
+
+from m5.params import *
+from m5.proxy import *
+from MemObject import MemObject
+from ReplacementPolicy import ReplacementPolicy
+
+class WeightedLRUReplacementPolicy(ReplacementPolicy):
+    type = "WeightedLRUReplacementPolicy"
+    cxx_class = "WeightedLRUPolicy"
+    cxx_header = "mem/ruby/system/WeightedLRUPolicy.hh"
+    cache = Param.RubyCache("")
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py
index a530307ee..fc3f32c3d 100644
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -35,13 +35,17 @@ import re
 
 python_class_map = {
                     "int": "Int",
+                    "NodeID": "Int",
                     "uint32_t" : "UInt32",
                     "std::string": "String",
                     "bool": "Bool",
                     "CacheMemory": "RubyCache",
                     "WireBuffer": "RubyWireBuffer",
                     "Sequencer": "RubySequencer",
+                    "GPUCoalescer" : "RubyGPUCoalescer",
+                    "VIPERCoalescer" : "VIPERCoalescer",
                     "DirectoryMemory": "RubyDirectoryMemory",
+                    "PerfectCacheMemory": "RubyPerfectCacheMemory",
                     "MemoryControl": "MemoryControl",
                     "MessageBuffer": "MessageBuffer",
                     "DMASequencer": "DMASequencer",
@@ -305,7 +309,7 @@ class $c_ident : public AbstractController
     void collateStats();
 
     void recordCacheTrace(int cntrl, CacheRecorder* tr);
-    Sequencer* getSequencer() const;
+    Sequencer* getCPUSequencer() const;
 
     int functionalWriteBuffers(PacketPtr&);
 
@@ -527,8 +531,14 @@ $c_ident::$c_ident(const Params *p)
             else:
                 code('m_${{param.ident}} = p->${{param.ident}};')
 
-            if re.compile("sequencer").search(param.ident):
-                code('m_${{param.ident}}_ptr->setController(this);')
+            if re.compile("sequencer").search(param.ident) or \
+                   param.type_ast.type.c_ident == "GPUCoalescer" or \
+                   param.type_ast.type.c_ident == "VIPERCoalescer":
+                code('''
+if (m_${{param.ident}}_ptr != NULL) {
+    m_${{param.ident}}_ptr->setController(this);
+}
+''')
 
         code('''
 
@@ -670,6 +680,28 @@ $c_ident::init()
                 assert(param.pointer)
                 seq_ident = "m_%s_ptr" % param.ident
 
+        if seq_ident != "NULL":
+            code('''
+Sequencer*
+$c_ident::getCPUSequencer() const
+{
+    if (NULL != $seq_ident && $seq_ident->isCPUSequencer()) {
+        return $seq_ident;
+    } else {
+        return NULL;
+    }
+}
+''')
+        else:
+            code('''
+
+Sequencer*
+$c_ident::getCPUSequencer() const
+{
+    return NULL;
+}
+''')
+
         code('''
 
 void
@@ -796,12 +828,6 @@ $c_ident::getMemoryQueue() const
     return $memq_ident;
 }
 
-Sequencer*
-$c_ident::getSequencer() const
-{
-    return $seq_ident;
-}
-
 void
 $c_ident::print(ostream& out) const
 {
author	Tony Gutierrez <anthony.gutierrez@amd.com>	2016-01-19 14:28:22 -0500
committer	Tony Gutierrez <anthony.gutierrez@amd.com>	2016-01-19 14:28:22 -0500
commit	1a7d3f9fcb76a68540dd948f91413533a383bfde (patch)
tree	867510a147cd095f19499d26b7c02d27de4cae9d /src/mem
parent	28e353e0403ea379d244a418e8dc8ee0b48187cf (diff)
download	gem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz