summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLena Olson <lena@cs.wisc.edu>2015-09-16 20:18:40 -0500
committerLena Olson <lena@cs.wisc.edu>2015-09-16 20:18:40 -0500
commit3225379cc0ab38e559dd05a8b161d80f16e67d78 (patch)
tree4391ede90d9d26e8c0c354609f3d05f13713f8c7
parent950e431d8766a8cf3b897965c1726e6d2576c6dc (diff)
downloadgem5-3225379cc0ab38e559dd05a8b161d80f16e67d78.tar.xz
ruby: Add missing block deallocations in MOESI_hammer
Some blocks in MOESI hammer were not getting deallocated when they were set to an idle state (e.g. by invalidate or other_getx/s messages). While functionally correct, this caused some bad effects on performance, such as blocks in I in the L1s getting sent to the L2 upon eviction, in turn evicting valid blocks. Also, if a valid block was in LRU, that block could be evicted rather than a block in I. This patch adds in the missing deallocations. Committed by: Nilay Vaish<nilay@cs.wisc.edu>
-rw-r--r--src/mem/protocol/MOESI_hammer-cache.sm60
1 files changed, 24 insertions, 36 deletions
diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index 5d2383541..cfb2526a5 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -89,7 +89,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack";
MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack";
II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Other_GETS or Other_GETX, waiting for ack";
- IT, AccessPermission:Busy, "IT", desc="Invalid block transferring to L1";
ST, AccessPermission:Busy, "ST", desc="S block transferring to L1";
OT, AccessPermission:Busy, "OT", desc="O block transferring to L1";
MT, AccessPermission:Busy, "MT", desc="M block transferring to L1";
@@ -1243,6 +1242,20 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
unset_cache_entry();
}
+ action(gr_deallocateCacheBlock, "\gr", desc="Deallocate an L1 or L2 cache block.") {
+ if (L1Dcache.isTagPresent(address)) {
+ L1Dcache.deallocate(address);
+ }
+ else if (L1Icache.isTagPresent(address)){
+ L1Icache.deallocate(address);
+ }
+ else {
+ assert(L2cache.isTagPresent(address));
+ L2cache.deallocate(address);
+ }
+ unset_cache_entry();
+ }
+
action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
if (send_evictions) {
DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
@@ -1296,7 +1309,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
//*****************************************************
// Transitions for Load/Store/L2_Replacement from transient states
- transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
+ transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
zz_stallAndWaitMandatoryQueue;
}
@@ -1308,11 +1321,11 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
zz_stallAndWaitMandatoryQueue;
}
- transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
+ transition({IM, IS, OI, MI, II, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
zz_stallAndWaitMandatoryQueue;
}
- transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) {
+ transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F, IR, SR, OR, MR, MMR}, L1_to_L2) {
zz_stallAndWaitMandatoryQueue;
}
@@ -1324,7 +1337,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
zz_stallAndWaitMandatoryQueue;
}
- transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
+ transition({ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
z_stall;
}
@@ -1333,7 +1346,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
}
// Transitions moving data between the L1 and L2 caches
- transition({I, S, O, M, MM}, L1_to_L2) {
+ transition({S, O, M, MM}, L1_to_L2) {
i_allocateTBE;
gg_deallocateL1CacheBlock;
vv_allocateL2CacheBlock;
@@ -1341,16 +1354,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
s_deallocateTBE;
}
- transition(I, Trigger_L2_to_L1D, IT) {
- i_allocateTBE;
- rr_deallocateL2CacheBlock;
- ii_allocateL1DCacheBlock;
- nb_copyFromTBEToL1; // Not really needed for state I
- s_deallocateTBE;
- zz_stallAndWaitMandatoryQueue;
- ll_L2toL1Transfer;
- }
-
transition(S, Trigger_L2_to_L1D, ST) {
i_allocateTBE;
rr_deallocateL2CacheBlock;
@@ -1391,16 +1394,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
ll_L2toL1Transfer;
}
- transition(I, Trigger_L2_to_L1I, IT) {
- i_allocateTBE;
- rr_deallocateL2CacheBlock;
- jj_allocateL1ICacheBlock;
- nb_copyFromTBEToL1;
- s_deallocateTBE;
- zz_stallAndWaitMandatoryQueue;
- ll_L2toL1Transfer;
- }
-
transition(S, Trigger_L2_to_L1I, ST) {
i_allocateTBE;
rr_deallocateL2CacheBlock;
@@ -1441,11 +1434,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
ll_L2toL1Transfer;
}
- transition(IT, Complete_L2_to_L1, IR) {
- j_popTriggerQueue;
- kd_wakeUpDependents;
- }
-
transition(ST, Complete_L2_to_L1, SR) {
j_popTriggerQueue;
kd_wakeUpDependents;
@@ -1500,11 +1488,6 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
k_popMandatoryQueue;
}
- transition(I, L2_Replacement) {
- rr_deallocateL2CacheBlock;
- ka_wakeUpAllDependents;
- }
-
transition(I, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
f_sendAck;
l_popForwardQueue;
@@ -1564,6 +1547,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
transition(S, {Other_GETX, Invalidate}, I) {
f_sendAck;
forward_eviction_to_cpu;
+ gr_deallocateCacheBlock;
l_popForwardQueue;
}
@@ -1630,6 +1614,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
transition(O, {Other_GETX, Invalidate}, I) {
e_sendData;
forward_eviction_to_cpu;
+ gr_deallocateCacheBlock;
l_popForwardQueue;
}
@@ -1712,12 +1697,14 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
transition(MM, {Other_GETX, Invalidate}, I) {
c_sendExclusiveData;
forward_eviction_to_cpu;
+ gr_deallocateCacheBlock;
l_popForwardQueue;
}
transition(MM, Other_GETS, I) {
c_sendExclusiveData;
forward_eviction_to_cpu;
+ gr_deallocateCacheBlock;
l_popForwardQueue;
}
@@ -1778,6 +1765,7 @@ machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
transition(M, {Other_GETX, Invalidate}, I) {
c_sendExclusiveData;
forward_eviction_to_cpu;
+ gr_deallocateCacheBlock;
l_popForwardQueue;
}