summaryrefslogtreecommitdiff
path: root/src/mem/protocol/MOESI_hammer-cache.sm
diff options
context:
space:
mode:
authorBrad Beckmann <Brad.Beckmann@amd.com>2010-08-20 11:46:14 -0700
committerBrad Beckmann <Brad.Beckmann@amd.com>2010-08-20 11:46:14 -0700
commit855748030032dc09a054a204ec93f16c91ee1577 (patch)
treec6d842e2d95e4e84ba6797018027e5ffb40c2b71 /src/mem/protocol/MOESI_hammer-cache.sm
parent908364a1c9239ad15d166720fdd89fe9f65d9331 (diff)
downloadgem5-855748030032dc09a054a204ec93f16c91ee1577.tar.xz
ruby: Added merge GETS optimization to hammer
Added an optimization that merges multiple pending GETS requests into a single request to the owner node.
Diffstat (limited to 'src/mem/protocol/MOESI_hammer-cache.sm')
-rw-r--r--src/mem/protocol/MOESI_hammer-cache.sm75
1 files changed, 74 insertions, 1 deletions
diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index ae74e284f..667b4ffcb 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -95,6 +95,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
// Requests
Other_GETX, desc="A GetX from another processor";
Other_GETS, desc="A GetS from another processor";
+ Merged_GETS, desc="A Merged GetS from another processor";
Other_GETS_No_Mig, desc="A GetS from another processor";
Invalidate, desc="Invalidate block";
@@ -136,6 +137,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
bool Sharers, desc="On a GetS, did we find any other sharers in the system";
MachineID LastResponder, desc="last machine to send a response for this request";
+ MachineID CurOwner, desc="current owner of the block, used for UnblockS responses";
Time InitialRequestTime, default="0", desc="time the initial requests was sent from the L1Cache";
Time ForwardRequestTime, default="0", desc="time the dir forwarded the request";
Time FirstResponseTime, default="0", desc="the time the first response was received";
@@ -286,6 +288,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
peek(forwardToCache_in, RequestMsg, block_on="Address") {
if (in_msg.Type == CoherenceRequestType:GETX) {
trigger(Event:Other_GETX, in_msg.Address);
+ } else if (in_msg.Type == CoherenceRequestType:MERGED_GETS) {
+ trigger(Event:Merged_GETS, in_msg.Address);
} else if (in_msg.Type == CoherenceRequestType:GETS) {
if (isCacheTagPresent(in_msg.Address)) {
if (getCacheEntry(in_msg.Address).AtomicAccessed && no_mig_atomic) {
@@ -518,6 +522,24 @@ machine(L1Cache, "AMD Hammer-like protocol")
}
}
+ action(em_sendDataSharedMultiple, "em", desc="Send data from cache to all requestors") {
+ peek(forwardToCache_in, RequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceResponseType:DATA_SHARED;
+ out_msg.Sender := machineID;
+ out_msg.Destination := in_msg.MergedRequestors;
+ out_msg.DataBlk := getCacheEntry(address).DataBlk;
+ DEBUG_EXPR(out_msg.DataBlk);
+ out_msg.Dirty := getCacheEntry(address).Dirty;
+ out_msg.Acks := machineCount(MachineType:L1Cache);
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+ }
+ }
+ }
+
action(f_sendAck, "f", desc="Send ack from cache to requestor") {
peek(forwardToCache_in, RequestMsg) {
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
@@ -575,6 +597,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:UNBLOCKS;
out_msg.Sender := machineID;
+ out_msg.CurOwner := TBEs[address].CurOwner;
out_msg.Destination.add(map_Address_to_Directory(address));
out_msg.MessageSize := MessageSizeType:Unblock_Control;
}
@@ -690,6 +713,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
}
}
}
+ action(uo_updateCurrentOwner, "uo", desc="When moving SS state, update current owner.") {
+ peek(responseToCache_in, ResponseMsg) {
+ TBEs[address].CurOwner := in_msg.Sender;
+ }
+ }
action(n_popResponseQueue, "n", desc="Pop response queue") {
responseToCache_in.dequeue();
@@ -745,6 +773,24 @@ machine(L1Cache, "AMD Hammer-like protocol")
}
}
+ action(qm_sendDataFromTBEToCache, "qm", desc="Send data from TBE to cache, multiple sharers") {
+ peek(forwardToCache_in, RequestMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination := in_msg.MergedRequestors;
+ DEBUG_EXPR(out_msg.Destination);
+ out_msg.DataBlk := TBEs[address].DataBlk;
+ out_msg.Dirty := TBEs[address].Dirty;
+ out_msg.Acks := machineCount(MachineType:L1Cache);
+ out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+ out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+ }
+ }
+ }
+
action(qq_sendDataFromTBEToMemory, "\q", desc="Send data from TBE to memory") {
enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
out_msg.Address := address;
@@ -899,7 +945,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
zz_recycleMandatoryQueue;
}
- transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
+ transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate}) {
// stall
}
@@ -1111,6 +1157,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
l_popForwardQueue;
}
+ transition(O, Merged_GETS) {
+ em_sendDataSharedMultiple;
+ l_popForwardQueue;
+ }
+
// Transitions from Modified
transition(MM, {Load, Ifetch}) {
h_load_hit;
@@ -1143,6 +1194,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
l_popForwardQueue;
}
+ transition(MM, Merged_GETS, O) {
+ em_sendDataSharedMultiple;
+ l_popForwardQueue;
+ }
+
// Transitions from Dirty Exclusive
transition(M, {Load, Ifetch}) {
h_load_hit;
@@ -1170,6 +1226,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
l_popForwardQueue;
}
+ transition(M, Merged_GETS, O) {
+ em_sendDataSharedMultiple;
+ l_popForwardQueue;
+ }
+
// Transitions from IM
transition(IM, {Other_GETX, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
@@ -1249,6 +1310,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
l_popForwardQueue;
}
+ transition(OM, Merged_GETS) {
+ em_sendDataSharedMultiple;
+ l_popForwardQueue;
+ }
+
transition(OM, Ack) {
m_decrementNumberOfMessages;
o_checkForCompletion;
@@ -1287,6 +1353,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
m_decrementNumberOfMessages;
o_checkForCompletion;
hx_external_load_hit;
+ uo_updateCurrentOwner;
n_popResponseQueue;
}
@@ -1304,6 +1371,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
m_decrementNumberOfMessages;
o_checkForCompletion;
hx_external_load_hit;
+ uo_updateCurrentOwner;
n_popResponseQueue;
}
@@ -1385,6 +1453,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
l_popForwardQueue;
}
+ transition({OI, MI}, Merged_GETS, OI) {
+ qm_sendDataFromTBEToCache;
+ l_popForwardQueue;
+ }
+
transition(MI, Writeback_Ack, I) {
t_sendExclusiveDataFromTBEToMemory;
s_deallocateTBE;