summaryrefslogtreecommitdiff
path: root/src/mem/protocol/MOESI_hammer-dir.sm
diff options
context:
space:
mode:
Diffstat (limited to 'src/mem/protocol/MOESI_hammer-dir.sm')
-rw-r--r--src/mem/protocol/MOESI_hammer-dir.sm135
1 files changed, 130 insertions, 5 deletions
diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm
index 369f8784b..f364b5846 100644
--- a/src/mem/protocol/MOESI_hammer-dir.sm
+++ b/src/mem/protocol/MOESI_hammer-dir.sm
@@ -88,6 +88,9 @@ machine(Directory, "AMD Hammer-like protocol")
WB, AccessPermission:Invalid, desc="Blocked on a writeback";
WB_O_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to O";
WB_E_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to E";
+
+ NO_F, AccessPermission:Invalid, desc="Blocked on a flush";
+ NO_F_W, AccessPermission:Invalid, desc="Not Owner, Blocked, waiting for Dram";
}
// Events
@@ -126,6 +129,8 @@ machine(Directory, "AMD Hammer-like protocol")
All_acks_and_owner_data, desc="Received shared data and message acks";
All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy";
All_Unblocks, desc="Received all unblocks for a merged gets request";
+ GETF, desc="A GETF arrives";
+ PUTF, desc="A PUTF arrives";
}
// TYPES
@@ -233,6 +238,8 @@ machine(Directory, "AMD Hammer-like protocol")
return Event:GETS;
} else if (type == CoherenceRequestType:GETX) {
return Event:GETX;
+ } else if (type == CoherenceRequestType:GETF) {
+ return Event:GETF;
} else {
error("Invalid CoherenceRequestType");
}
@@ -355,6 +362,8 @@ machine(Directory, "AMD Hammer-like protocol")
TBE tbe := TBEs[in_msg.Address];
if (in_msg.Type == CoherenceRequestType:PUT) {
trigger(Event:PUT, in_msg.Address, pf_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:PUTF) {
+ trigger(Event:PUTF, in_msg.Address, pf_entry, tbe);
} else {
if (probe_filter_enabled || full_bit_dir_enabled) {
if (is_valid(pf_entry)) {
@@ -453,6 +462,20 @@ machine(Directory, "AMD Hammer-like protocol")
}
}
+ action(oc_sendBlockAck, "oc", desc="Send block ack to the owner") {
+ peek(requestQueue_in, RequestMsg) {
+ if ((probe_filter_enabled || full_bit_dir_enabled) && (in_msg.Requestor == cache_entry.Owner)) {
+ enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceRequestType:BLOCK_ACK;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+ }
+
action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") {
peek(requestQueue_in, RequestMsg) {
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
@@ -966,6 +989,42 @@ machine(Directory, "AMD Hammer-like protocol")
}
}
+ action(nofc_forwardRequestConditionalOwner, "nofc", desc="Forward request to one or more nodes if the requestor is not the owner") {
+ assert(machineCount(MachineType:L1Cache) > 1);
+
+ if (probe_filter_enabled || full_bit_dir_enabled) {
+ peek(requestQueue_in, RequestMsg) {
+ if (in_msg.Requestor != cache_entry.Owner) {
+ enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.Address := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.add(cache_entry.Owner);
+ out_msg.MessageSize := MessageSizeType:Request_Control;
+ out_msg.DirectedProbe := true;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := get_time();
+ }
+ }
+ }
+ } else {
+ peek(requestQueue_in, RequestMsg) {
+ enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
+ out_msg.Address := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
+ out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor
+ out_msg.MessageSize := MessageSizeType:Broadcast_Control;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := get_time();
+ }
+ }
+ }
+
+ }
+
action(f_forwardWriteFromDma, "fw", desc="Forward requests") {
assert(is_valid(tbe));
if (tbe.NumPendingMsgs > 0) {
@@ -1185,6 +1244,16 @@ machine(Directory, "AMD Hammer-like protocol")
i_popIncomingRequestQueue;
}
+ transition(E, GETF, NO_F_W) {
+ pfa_probeFilterAllocate;
+ v_allocateTBE;
+ rx_recordExclusiveInTBE;
+ saa_setAcksToAllIfPF;
+ qf_queueMemoryFetchRequest;
+ fn_forwardRequestIfNecessary;
+ i_popIncomingRequestQueue;
+ }
+
transition(E, GETS, NO_B_W) {
pfa_probeFilterAllocate;
v_allocateTBE;
@@ -1223,6 +1292,17 @@ machine(Directory, "AMD Hammer-like protocol")
i_popIncomingRequestQueue;
}
+ transition(O, GETF, NO_F_W) {
+ r_setMRU;
+ v_allocateTBE;
+ r_recordDataInTBE;
+ sa_setAcksToOne;
+ qf_queueMemoryFetchRequest;
+ fb_forwardRequestBcast;
+ cs_clearSharers;
+ i_popIncomingRequestQueue;
+ }
+
// This transition is dumb, if a shared copy exists on-chip, then that should
// provide data, not slow off-chip dram. The problem is that the current
// caches don't provide data in S state
@@ -1286,6 +1366,13 @@ machine(Directory, "AMD Hammer-like protocol")
i_popIncomingRequestQueue;
}
+ transition(NX, GETF, NO_F) {
+ r_setMRU;
+ fb_forwardRequestBcast;
+ cs_clearSharers;
+ i_popIncomingRequestQueue;
+ }
+
// Transitions out of NO state
transition(NO, GETX, NO_B) {
r_setMRU;
@@ -1295,6 +1382,15 @@ machine(Directory, "AMD Hammer-like protocol")
i_popIncomingRequestQueue;
}
+ transition(NO, GETF, NO_F) {
+ r_setMRU;
+ //ano_assertNotOwner;
+ nofc_forwardRequestConditionalOwner; //forward request if the requester is not the owner
+ cs_clearSharers;
+ oc_sendBlockAck; // send ack if the owner
+ i_popIncomingRequestQueue;
+ }
+
transition(S, GETX, NO_B) {
r_setMRU;
fb_forwardRequestBcast;
@@ -1302,6 +1398,13 @@ machine(Directory, "AMD Hammer-like protocol")
i_popIncomingRequestQueue;
}
+ transition(S, GETF, NO_F) {
+ r_setMRU;
+ fb_forwardRequestBcast;
+ cs_clearSharers;
+ i_popIncomingRequestQueue;
+ }
+
transition(S, GETS, NO_B) {
r_setMRU;
ano_assertNotOwner;
@@ -1348,12 +1451,16 @@ machine(Directory, "AMD Hammer-like protocol")
// Blocked transient states
transition({NO_B_X, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W,
- NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R},
- {GETS, GETX, PUT, Pf_Replacement}) {
+ NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R, NO_F_W},
+ {GETS, GETX, GETF, PUT, Pf_Replacement}) {
z_stallAndWaitRequest;
}
- transition(NO_B, GETX, NO_B_X) {
+ transition(NO_F, {GETS, GETX, GETF, PUT, Pf_Replacement}){
+ z_stallAndWaitRequest;
+ }
+
+ transition(NO_B, {GETX, GETF}, NO_B_X) {
z_stallAndWaitRequest;
}
@@ -1361,13 +1468,13 @@ machine(Directory, "AMD Hammer-like protocol")
z_stallAndWaitRequest;
}
- transition(NO_B_S, {GETX, PUT, Pf_Replacement}) {
+ transition(NO_B_S, {GETX, GETF, PUT, Pf_Replacement}) {
z_stallAndWaitRequest;
}
transition({NO_B_X, NO_B, NO_B_S, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W,
- NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R},
+ NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R, NO_F_W},
{DMA_READ, DMA_WRITE}) {
zd_stallAndWaitDMARequest;
}
@@ -1444,6 +1551,12 @@ machine(Directory, "AMD Hammer-like protocol")
l_popMemQueue;
}
+ transition(NO_F_W, Memory_Data, NO_F) {
+ d_sendData;
+ w_deallocateTBE;
+ l_popMemQueue;
+ }
+
transition(NO_DR_B_W, Memory_Data, NO_DR_B) {
r_recordMemoryData;
o_checkForCompletion;
@@ -1738,4 +1851,16 @@ machine(Directory, "AMD Hammer-like protocol")
k_wakeUpDependents;
j_popIncomingUnblockQueue;
}
+
+ transition(NO_F, PUTF, WB) {
+ a_sendWriteBackAck;
+ i_popIncomingRequestQueue;
+ }
+
+ //possible race between GETF and UnblockM -- not sure needed any more?
+ transition(NO_F, UnblockM) {
+ us_updateSharerIfFBD;
+ uo_updateOwnerIfPf;
+ j_popIncomingUnblockQueue;
+ }
}