From a2e98f191fe02ab4caa21c508401d42eefbc2119 Mon Sep 17 00:00:00 2001
From: Brad Beckmann <Brad.Beckmann@amd.com>
Date: Sat, 19 Mar 2011 14:17:48 -0700
Subject: MOESI_CMP_directory: significant dma bug fixes

---
 src/mem/protocol/MOESI_CMP_directory-L1cache.sm |  67 ++++++++--
 src/mem/protocol/MOESI_CMP_directory-L2cache.sm | 171 ++++++++++++++++++++----
 src/mem/protocol/MOESI_CMP_directory-dir.sm     |  27 +++-
 src/mem/protocol/MOESI_CMP_directory-dma.sm     |   4 +
 src/mem/protocol/MOESI_CMP_directory-msg.sm     |   1 -
 5 files changed, 233 insertions(+), 37 deletions(-)

(limited to 'src/mem')

diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
index 78641d014..310c3027a 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
@@ -663,10 +663,27 @@ machine(L1Cache, "Directory protocol")
   }
 
 
+  action(ub_dmaUnblockL2Cache, "ub", desc="Send dma ack to l2 cache") {
+    peek(requestNetwork_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
+        out_msg.Address := address;
+        out_msg.Type := CoherenceResponseType:DMA_ACK;
+        out_msg.Sender := machineID;
+        out_msg.SenderMachine := MachineType:L1Cache;
+        out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, 
+                l2_select_low_bit, l2_select_num_bits));
+        out_msg.Dirty := false;
+        out_msg.Acks := 1;
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+      }
+    }
+  }
+
   action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") {
     peek(requestNetwork_in, RequestMsg) {
       assert(is_valid(tbe));
-      if (in_msg.RequestorMachine == MachineType:L1Cache) {
+      if (in_msg.RequestorMachine == MachineType:L1Cache || 
+          in_msg.RequestorMachine == MachineType:DMA) {
         enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
           out_msg.Address := address;
           out_msg.Type := CoherenceResponseType:DATA;
@@ -895,11 +912,17 @@ machine(L1Cache, "Directory protocol")
     l_popForwardQueue;
   }
 
-  transition(S, {Fwd_GETS, Fwd_DMA}) {
+  transition(S, Fwd_GETS) {
     e_sendData;
     l_popForwardQueue;
   }
 
+  transition(S, Fwd_DMA) {
+    e_sendData;
+    ub_dmaUnblockL2Cache;
+    l_popForwardQueue;
+  }
+
   // Transitions from Owned
   transition({O, OM}, {Load, Ifetch}) {
     h_load_hit;
@@ -924,11 +947,17 @@ machine(L1Cache, "Directory protocol")
     l_popForwardQueue;
   }
 
-  transition(O, {Fwd_GETS, Fwd_DMA}) {
+  transition(O, Fwd_GETS) {
     e_sendData;
     l_popForwardQueue;
   }
 
+  transition(O, Fwd_DMA) {
+    e_sendData;
+    ub_dmaUnblockL2Cache;
+    l_popForwardQueue;
+  }
+
   // Transitions from MM
   transition({MM, MM_W}, {Load, Ifetch}) {
     h_load_hit;
@@ -957,8 +986,8 @@ machine(L1Cache, "Directory protocol")
   }
 
   transition(MM, Fwd_DMA, MM) {
-    //ee_sendDataExclusive;
     e_sendData;
+    ub_dmaUnblockL2Cache;
     l_popForwardQueue;
   }
 
@@ -995,8 +1024,9 @@ machine(L1Cache, "Directory protocol")
     l_popForwardQueue;
   }
 
-  transition(M, Fwd_DMA, M) {
+  transition(M, Fwd_DMA) {
     e_sendData;
+    ub_dmaUnblockL2Cache;
     l_popForwardQueue;
   }
 
@@ -1039,11 +1069,17 @@ machine(L1Cache, "Directory protocol")
     n_popResponseQueue;
   }
 
-  transition(SM, {Fwd_DMA, Fwd_GETS}) {
+  transition(SM, Fwd_GETS) {
     e_sendData;
     l_popForwardQueue;
   }
 
+  transition(SM, Fwd_DMA) {
+    e_sendData;
+    ub_dmaUnblockL2Cache;
+    l_popForwardQueue;
+  }
+
   // Transitions from OM
   transition(OM, Own_GETX) {
     mm_decrementNumberOfMessages;
@@ -1058,8 +1094,14 @@ machine(L1Cache, "Directory protocol")
     l_popForwardQueue;
   }
 
-  transition(OM, {Fwd_DMA, Fwd_GETS}, OM) {
+  transition(OM, Fwd_GETS) {
+    e_sendData;
+    l_popForwardQueue;
+  }
+
+  transition(OM, Fwd_DMA) {
     e_sendData;
+    ub_dmaUnblockL2Cache;
     l_popForwardQueue;
   }
 
@@ -1119,8 +1161,9 @@ machine(L1Cache, "Directory protocol")
     l_popForwardQueue;
   }
 
-  transition(MI, Fwd_DMA, MI) {
+  transition(MI, Fwd_DMA) {
     q_sendDataFromTBEToCache;
+    ub_dmaUnblockL2Cache;
     l_popForwardQueue;
   }
 
@@ -1129,8 +1172,14 @@ machine(L1Cache, "Directory protocol")
     l_popForwardQueue;
   }
 
-  transition({SI, OI}, {Fwd_DMA, Fwd_GETS}) {
+  transition({SI, OI}, Fwd_GETS) {
+    q_sendDataFromTBEToCache;
+    l_popForwardQueue;
+  }
+
+  transition({SI, OI}, Fwd_DMA) {
     q_sendDataFromTBEToCache;
+    ub_dmaUnblockL2Cache;
     l_popForwardQueue;
   }
 
diff --git a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
index 059fcac71..297904c90 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
@@ -125,6 +125,13 @@ machine(L2Cache, "Token protocol")
     MII, AccessPermission:Busy, desc="Blocked, doing writeback, was M, got Fwd_GETX";
     OLSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS";
     ILSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS got Fwd_GETX";
+
+    // DMA blocking states
+    ILOSD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack";
+    ILOSXD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack";
+    ILOD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack";
+    ILXD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack";
+    ILOXD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack";
   }
 
   // EVENTS
@@ -158,7 +165,7 @@ machine(L2Cache, "Token protocol")
     Unblock,         desc="Local L1 is telling L2 dir to unblock";
     Exclusive_Unblock,         desc="Local L1 is telling L2 dir to unblock";
 
-
+    DmaAck,          desc="DMA ack from local L1";
     // events initiated by this L2
     L2_Replacement,     desc="L2 Replacement", format="!r";
 
@@ -636,6 +643,9 @@ machine(L2Cache, "Token protocol")
             trigger(Event:L1_WBCLEANDATA, in_msg.Address,
                     cache_entry, TBEs[in_msg.Address]);
           }
+        } else if (in_msg.Type == CoherenceResponseType:DMA_ACK) {
+          trigger(Event:DmaAck, in_msg.Address,
+                  getCacheEntry(in_msg.Address), TBEs[in_msg.Address]);
         } else {
           error("Unexpected message");
         }
@@ -769,6 +779,26 @@ machine(L2Cache, "Token protocol")
     }
   }
 
+  action(cd_sendDataFromTBEToFwdDma, "cd", desc="Send data from TBE to external GETX") {
+    assert(is_valid(tbe));
+    peek(requestNetwork_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
+        out_msg.Address := address;
+        out_msg.Type := CoherenceResponseType:DATA;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.DataBlk := tbe.DataBlk;
+        // out_msg.Dirty := tbe.Dirty;
+        // shared data should be clean
+        out_msg.Dirty := false;
+        out_msg.Acks := tbe.Fwd_GETX_ExtAcks;
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+      }
+    }
+    DPRINTF(RubySlicc, "Address: %s, Data Block: %s\n",
+            address, tbe.DataBlk);
+  }
+
   action(c_sendDataFromTBEToFwdGETS, "ccc", desc="Send data from TBE to external GETX") {
     assert(is_valid(tbe));
     enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
@@ -1114,6 +1144,7 @@ machine(L2Cache, "Token protocol")
       assert(is_valid(tbe));
       tbe.DataBlk := in_msg.DataBlk;
       tbe.Dirty := in_msg.Dirty;
+      APPEND_TRANSITION_COMMENT(in_msg.Sender);
     }
   }
 
@@ -1148,6 +1179,21 @@ machine(L2Cache, "Token protocol")
     }
   }
 
+  action(jd_forwardDmaRequestToLocalOwner, "jd", desc="Forward dma request to local owner") {
+    peek(requestNetwork_in, RequestMsg) {
+      enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
+        out_msg.Address := in_msg.Address;
+        out_msg.Type := in_msg.Type;
+        out_msg.Requestor := in_msg.Requestor;
+        out_msg.RequestorMachine := in_msg.RequestorMachine;
+        out_msg.Destination.add(getLocalOwner(cache_entry, in_msg.Address));
+        out_msg.Type := in_msg.Type;
+        out_msg.MessageSize := MessageSizeType:Forwarded_Control;
+        out_msg.Acks := 0 - 1;
+      }
+    }
+  }
+
 
   action(k_forwardLocalGETSToLocalSharer, "k", desc="Forward local request to local sharer/owner") {
     peek(L1requestNetwork_in, RequestMsg) {
@@ -1436,33 +1482,48 @@ machine(L2Cache, "Token protocol")
     responseNetwork_in.recycle();
   }
 
+  action(da_sendDmaAckUnblock, "da", desc="Send dma ack to global directory") {
+    enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
+      out_msg.Address := address;
+      out_msg.Type := CoherenceResponseType:DMA_ACK;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Sender := machineID;
+      out_msg.SenderMachine := MachineType:L2Cache;
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+    }
+  }
+
 
 
   //*****************************************************
   // TRANSITIONS
   //*****************************************************
 
-  transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) {
+  transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) {
     zz_recycleL1RequestQueue;
   }
 
-  transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS}, {L1_GETX, L1_GETS}) {
+  transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) {
     zz_recycleL1RequestQueue;
   }
 
-  transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS,  IGS, IGM, IGMLS, IGMO, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS}, L2_Replacement) {
+  transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS,  IGS, IGM, IGMLS, IGMO, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) {
     zz_recycleResponseQueue;
   }
 
-  transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) {
+  transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) {
     zz_recycleRequestQueue;
   }
 
-  transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS}, {Inv}) {
+  transition({OGMIO, IGMIO, IGMO}, Fwd_DMA) {
     zz_recycleRequestQueue;
   }
 
-  transition({IGM, IGS}, {Own_GETX}) {
+  transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) {
+    zz_recycleRequestQueue;
+  }
+
+  transition({IGM, IGS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Own_GETX}) {
     zz_recycleRequestQueue;
   }
 
@@ -1527,20 +1588,70 @@ machine(L2Cache, "Token protocol")
     m_popRequestQueue;
   }
 
-  transition({ILOS, ILOSX}, Fwd_DMA) {
+  transition(ILOS, Fwd_DMA, ILOSD) {
+    i_allocateTBE;
+    jd_forwardDmaRequestToLocalOwner;
+    m_popRequestQueue;
+  }
+
+  transition(ILOSD, DmaAck, ILOS) {
+    s_deallocateTBE;
+    da_sendDmaAckUnblock;
+    n_popResponseQueue;
+  }
+
+  transition(ILOSX, Fwd_DMA, ILOSXD) {
     i_allocateTBE;
     t_recordFwdSID;
-    j_forwardGlobalRequestToLocalOwner;
+    jd_forwardDmaRequestToLocalOwner;
     m_popRequestQueue;
   }
 
-  transition({ILO, ILX, ILOX}, Fwd_DMA) {
+  transition(ILOSXD, DmaAck, ILOSX) {
+    s_deallocateTBE;
+    da_sendDmaAckUnblock;
+    n_popResponseQueue;
+  }
+
+  transition(ILO, Fwd_DMA, ILOD) {
     i_allocateTBE;
     t_recordFwdSID;
-    j_forwardGlobalRequestToLocalOwner;
+    jd_forwardDmaRequestToLocalOwner;
     m_popRequestQueue;
   }
- 
+
+  transition(ILOD, DmaAck, ILO) {
+    s_deallocateTBE;
+    da_sendDmaAckUnblock;
+    n_popResponseQueue;
+  }
+
+  transition(ILX, Fwd_DMA, ILXD) {
+    i_allocateTBE;
+    t_recordFwdSID;
+    jd_forwardDmaRequestToLocalOwner;
+    m_popRequestQueue;
+  }
+
+  transition(ILXD, DmaAck, ILX) {
+    s_deallocateTBE;
+    da_sendDmaAckUnblock;
+    n_popResponseQueue;
+  }
+
+  transition(ILOX, Fwd_DMA, ILOXD) {
+    i_allocateTBE;
+    t_recordFwdSID;
+    jd_forwardDmaRequestToLocalOwner;
+    m_popRequestQueue;
+  }
+
+  transition(ILOXD, DmaAck, ILOX) {
+    s_deallocateTBE;
+    da_sendDmaAckUnblock;
+    n_popResponseQueue;
+  }
+
   transition({ILOS, ILOSX, ILO, ILX, ILOX, ILXW}, Data) {
     i_copyDataToTBE;
     c_sendDataFromTBEToFwdGETS;
@@ -1625,8 +1736,14 @@ machine(L2Cache, "Token protocol")
     m_popRequestQueue;
   }
 
-  transition({O, OLS}, {Fwd_GETS, Fwd_DMA}) {
+  transition({O, OLS}, Fwd_GETS) {
+    dd_sendDataToFwdGETS;
+    m_popRequestQueue;
+  }
+
+  transition({O, OLS}, Fwd_DMA) {
     dd_sendDataToFwdGETS;
+    da_sendDmaAckUnblock;
     m_popRequestQueue;
   }
 
@@ -1638,6 +1755,7 @@ machine(L2Cache, "Token protocol")
 
   transition(OLSX, Fwd_DMA) {
     dd_sendDataToFwdGETS;
+    da_sendDmaAckUnblock;
     m_popRequestQueue;
   }
 
@@ -1662,6 +1780,7 @@ machine(L2Cache, "Token protocol")
 
   transition(M, Fwd_DMA) {
      dd_sendExclusiveDataToFwdGETS;
+     da_sendDmaAckUnblock;
      m_popRequestQueue;
   }
 
@@ -1982,7 +2101,7 @@ machine(L2Cache, "Token protocol")
     o_popL1RequestQueue;
   }
 
-  transition(OGMIO, {Fwd_GETS, Fwd_DMA}) {
+  transition(OGMIO, Fwd_GETS) {
     t_recordFwdSID;
     c_sendDataFromTBEToFwdGETS;
     m_popRequestQueue;
@@ -2017,12 +2136,6 @@ machine(L2Cache, "Token protocol")
     m_popRequestQueue;
   }
 
-  transition(IGMIO, Fwd_DMA) {
-    t_recordFwdSID;
-    j_forwardGlobalRequestToLocalOwner;
-    m_popRequestQueue;
-  }
-
   transition(IGMIOFS, Data, IGMIO) {
     i_copyDataToTBE;
     c_sendDataFromTBEToFwdGETS;
@@ -2202,7 +2315,7 @@ machine(L2Cache, "Token protocol")
 
   }
 
-  transition(IGMO, {Fwd_GETS, Fwd_DMA}) {
+  transition(IGMO, Fwd_GETS) {
     t_recordFwdSID;
     c_sendDataFromTBEToFwdGETS;
     m_popRequestQueue;
@@ -2557,18 +2670,30 @@ machine(L2Cache, "Token protocol")
     n_popTriggerQueue;
   }
 
-  transition(OLSI, {Fwd_GETS, Fwd_DMA}) {
+  transition(OLSI, Fwd_GETS) {
     t_recordFwdSID;
     c_sendDataFromTBEToFwdGETS;
     m_popRequestQueue;
   }
 
-  transition({MI, OI}, {Fwd_GETS, Fwd_DMA}, OI) {
+  transition({MI, OI}, Fwd_GETS, OI) {
     t_recordFwdSID;
     c_sendDataFromTBEToFwdGETS;
     m_popRequestQueue;
   }
 
+  transition({MI, OI}, Fwd_DMA, OI) {
+    cd_sendDataFromTBEToFwdDma;
+    da_sendDmaAckUnblock;
+    m_popRequestQueue;
+  }
+
+  transition(OLSI, Fwd_DMA) {
+    cd_sendDataFromTBEToFwdDma;
+    da_sendDmaAckUnblock;
+    m_popRequestQueue;
+  }
+
   transition({MI, OI}, Fwd_GETX, MII) {
     t_recordFwdXID;
     c_sendDataFromTBEToFwdGETX;
diff --git a/src/mem/protocol/MOESI_CMP_directory-dir.sm b/src/mem/protocol/MOESI_CMP_directory-dir.sm
index 42d46e501..bf5e8bff6 100644
--- a/src/mem/protocol/MOESI_CMP_directory-dir.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-dir.sm
@@ -70,6 +70,9 @@ machine(Directory, "Directory protocol")
     XI_M, AccessPermission:Busy, desc="In a stable state, going to I, waiting for the memory controller";
     XI_U, AccessPermission:Busy, desc="In a stable state, going to I, waiting for an unblock";
     OI_D, AccessPermission:Busy, desc="In O, going to I, waiting for data";
+
+    OD, AccessPermission:Busy, desc="In O, waiting for dma ack from L2";
+    MD, AccessPermission:Busy, desc="In M, waiting for dma ack from L2";
   }
 
   // Events
@@ -88,6 +91,7 @@ machine(Directory, "Directory protocol")
     Memory_Ack,    desc="Writeback Ack from memory arrives";
     DMA_READ,      desc="DMA Read";
     DMA_WRITE,     desc="DMA Write";
+    DMA_ACK,       desc="DMA Ack";
     Data,          desc="Data to directory";
   }
 
@@ -225,6 +229,9 @@ machine(Directory, "Directory protocol")
         } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
           trigger(Event:Data, in_msg.Address,
                   TBEs[in_msg.Address]);
+        } else if (in_msg.Type == CoherenceResponseType:DMA_ACK) {
+          trigger(Event:DMA_ACK, in_msg.Address,
+                  TBEs[in_msg.Address]);
         } else {
           error("Invalid message");
         }
@@ -295,6 +302,7 @@ machine(Directory, "Directory protocol")
         out_msg.Address := address;
         out_msg.Type := CoherenceRequestType:WB_NACK;
         out_msg.Requestor := in_msg.Requestor;
+        out_msg.RequestorMachine := MachineType:Directory;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
@@ -365,6 +373,7 @@ machine(Directory, "Directory protocol")
         out_msg.Address := address;
         out_msg.Type := in_msg.Type;
         out_msg.Requestor := in_msg.Requestor;
+        out_msg.RequestorMachine := machineIDToMachineType(in_msg.Requestor);
         out_msg.Destination.addNetDest(getDirectoryEntry(in_msg.Address).Owner);
         out_msg.Acks := getDirectoryEntry(address).Sharers.count();
         if (getDirectoryEntry(address).Sharers.isElement(in_msg.Requestor)) {
@@ -381,6 +390,7 @@ machine(Directory, "Directory protocol")
         out_msg.Address := address;
         out_msg.Type := in_msg.Type;
         out_msg.Requestor := machineID;
+        out_msg.RequestorMachine := machineIDToMachineType(in_msg.Requestor);
         out_msg.Destination.addNetDest(getDirectoryEntry(in_msg.Address).Owner);
         out_msg.Acks := getDirectoryEntry(address).Sharers.count();
         if (getDirectoryEntry(address).Sharers.isElement(in_msg.Requestor)) {
@@ -399,6 +409,7 @@ machine(Directory, "Directory protocol")
           out_msg.Address := address;
           out_msg.Type := CoherenceRequestType:INV;
           out_msg.Requestor := in_msg.Requestor;
+          out_msg.RequestorMachine := machineIDToMachineType(in_msg.Requestor);
           // out_msg.Destination := getDirectoryEntry(in_msg.Address).Sharers;
           out_msg.Destination.addNetDest(getDirectoryEntry(in_msg.Address).Sharers);
           out_msg.Destination.remove(in_msg.Requestor);
@@ -632,7 +643,7 @@ machine(Directory, "Directory protocol")
     i_popIncomingRequestQueue;
   }
 
-  transition(S, DMA_READ, S) {
+  transition(S, DMA_READ) {
     //qf_queueMemoryFetchRequest;
     p_fwdDataToDMA;
     //g_sendInvalidations;  // the DMA will collect the invalidations then send an Unblock Exclusive
@@ -674,12 +685,16 @@ machine(Directory, "Directory protocol")
     i_popIncomingRequestQueue;
   }
 
-  transition(O, DMA_READ, O) {
+  transition(O, DMA_READ, OD) {
     f_forwardRequest;     // this will cause the data to go to DMA directly
     //g_sendInvalidations;  // this will cause acks to be sent to the DMA
     i_popIncomingRequestQueue;
   }
 
+  transition(OD, DMA_ACK, O) {
+    j_popIncomingUnblockQueue;
+  }
+
   transition({O,M}, DMA_WRITE, OI_D) {
     f_forwardRequestDirIsRequestor;    // need the modified data before we can proceed
     g_sendInvalidations;               // these go to the DMA Controller
@@ -708,11 +723,15 @@ machine(Directory, "Directory protocol")
   }
 
   // no exclusive unblock will show up to the directory
-  transition(M, DMA_READ, M) {
+  transition(M, DMA_READ, MD) {
     f_forwardRequest;     // this will cause the data to go to DMA directly
     i_popIncomingRequestQueue;
   }
 
+  transition(MD, DMA_ACK, M) {
+    j_popIncomingUnblockQueue;
+  }
+
   transition(M, GETS, MO) {
     f_forwardRequest;
     i_popIncomingRequestQueue;
@@ -745,7 +764,7 @@ machine(Directory, "Directory protocol")
   }
 
 
-  transition({MM, MO, MI, MIS, OS, OSS, XI_M, XI_U, OI_D}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) {
+  transition({MM, MO, MI, MIS, OS, OSS, XI_M, XI_U, OI_D, OD, MD}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) {
     zz_recycleRequest;
   }
 
diff --git a/src/mem/protocol/MOESI_CMP_directory-dma.sm b/src/mem/protocol/MOESI_CMP_directory-dma.sm
index 30a311f67..42e48c95a 100644
--- a/src/mem/protocol/MOESI_CMP_directory-dma.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-dma.sm
@@ -140,6 +140,7 @@ machine(DMA, "DMA Controller")
         out_msg.Len := in_msg.Len;
         out_msg.Destination.add(map_Address_to_Directory(address));
         out_msg.Requestor := machineID;
+        out_msg.RequestorMachine := MachineType:DMA;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
     }
@@ -154,6 +155,7 @@ machine(DMA, "DMA Controller")
           out_msg.Len := in_msg.Len;
           out_msg.Destination.add(map_Address_to_Directory(address));
           out_msg.Requestor := machineID;
+          out_msg.RequestorMachine := MachineType:DMA;
           out_msg.MessageSize := MessageSizeType:Writeback_Control;
         }
       }
@@ -185,6 +187,8 @@ machine(DMA, "DMA Controller")
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:UNBLOCK_EXCLUSIVE;
       out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.Sender := machineID;
+      out_msg.SenderMachine := MachineType:DMA;
       out_msg.MessageSize := MessageSizeType:Writeback_Control;
     }
   }
diff --git a/src/mem/protocol/MOESI_CMP_directory-msg.sm b/src/mem/protocol/MOESI_CMP_directory-msg.sm
index edbff0c96..c901fb4ff 100644
--- a/src/mem/protocol/MOESI_CMP_directory-msg.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-msg.sm
@@ -59,7 +59,6 @@ enumeration(CoherenceResponseType, desc="...") {
   WRITEBACK_CLEAN_DATA,   desc="Clean writeback (contains data)";
   WRITEBACK_CLEAN_ACK,   desc="Clean writeback (contains no data)";
   WRITEBACK_DIRTY_DATA,   desc="Dirty writeback (contains data)";
-
   DMA_ACK,           desc="Ack that a DMA write completed";
 }
 
-- 
cgit v1.2.3