From 877be2009c0871effb3494b52ce5221d58a594d6 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:32 -0800 Subject: ruby: Changes necessary to get the hammer protocol to work in GEM5 --- src/mem/protocol/MOESI_hammer-cache.sm | 178 +++++++++++++++++++-------------- 1 file changed, 103 insertions(+), 75 deletions(-) (limited to 'src/mem/protocol/MOESI_hammer-cache.sm') diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index d244a9b93..6fb5091af 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -1,5 +1,6 @@ /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 2009 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,9 +25,27 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * AMD's contributions to the MOESI hammer protocol do not constitute an + * endorsement of its similarity to any AMD products. + * + * Authors: Milo Martin + * Brad Beckmann */ -machine(L1Cache, "AMD Hammer-like protocol") { +machine(L1Cache, "AMD Hammer-like protocol") +: int cache_response_latency, + int issue_latency +{ + + // NETWORK BUFFERS + MessageBuffer requestFromCache, network="To", virtual_network="3", ordered="false"; + MessageBuffer responseFromCache, network="To", virtual_network="1", ordered="false"; + MessageBuffer unblockFromCache, network="To", virtual_network="0", ordered="false"; + + MessageBuffer forwardToCache, network="From", virtual_network="2", ordered="false"; + MessageBuffer responseToCache, network="From", virtual_network="1", ordered="false"; + // STATES enumeration(State, desc="Cache states", default="L1Cache_State_I") { @@ -82,14 +101,16 @@ machine(L1Cache, "AMD Hammer-like protocol") { // TYPES + // STRUCTURE DEFINITIONS + + MessageBuffer mandatoryQueue, ordered="false"; + Sequencer sequencer, factory='RubySystem::getSequencer(m_cfg["sequencer"])'; + // CacheEntry - structure(Entry, desc="...") { - Address Address, desc="Address of this block, required by CacheMemory"; - Time LastRef, desc="Last time this block was referenced, required by CacheMemory"; - AccessPermission Permission, desc="Access permission for this block, required by CacheMemory"; - DataBlock DataBlk, desc="data for the block, required by CacheMemory"; + structure(Entry, desc="...", interface="AbstractCacheEntry") { State CacheState, desc="cache state"; bool Dirty, desc="Is the data dirty (different than memory)?"; + DataBlock DataBlk, desc="data for the block"; } // TBE fields @@ -101,27 +122,28 @@ machine(L1Cache, "AMD Hammer-like protocol") { bool Sharers, desc="On a GetS, did we find any other sharers in the system"; } - external_type(NewCacheMemory) { + external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); bool isTagPresent(Address); + void profileMiss(CacheMsg); } - external_type(NewTBETable) { + external_type(TBETable) { TBE lookup(Address); void allocate(Address); void deallocate(Address); bool isPresent(Address); } - NewTBETable TBEs, template_hack=""; - NewCacheMemory L1IcacheMemory, template_hack="", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,"L1I"'; - NewCacheMemory L1DcacheMemory, template_hack="", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,"L1D"'; - NewCacheMemory L2cacheMemory, template_hack="", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,"L2"'; + TBETable TBEs, template_hack=""; + CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])'; + CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])'; + CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])'; Entry getCacheEntry(Address addr), return_by_ref="yes" { if (L2cacheMemory.isTagPresent(addr)) { @@ -284,36 +306,36 @@ machine(L1Cache, "AMD Hammer-like protocol") { // ** INSTRUCTION ACCESS *** // Check to see if it is in the OTHER L1 - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - if (L2cacheMemory.cacheAvail(in_msg.Address)) { - trigger(Event:L1_to_L2, in_msg.Address); + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress); } else { - trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.LineAddress)); } } - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1IcacheMemory.cacheAvail(in_msg.Address)) { + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - if (L2cacheMemory.isTagPresent(in_msg.Address)) { + if (L2cacheMemory.isTagPresent(in_msg.LineAddress)) { // L2 has it (maybe not with the right permissions) - trigger(Event:L2_to_L1I, in_msg.Address); + trigger(Event:L2_to_L1I, in_msg.LineAddress); } else { // We have room, the L2 doesn't have it, so the L1 fetches the line - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } } else { // No room in the L1, so we need to make room - if (L2cacheMemory.cacheAvail(L1IcacheMemory.cacheProbe(in_msg.Address))) { + if (L2cacheMemory.cacheAvail(L1IcacheMemory.cacheProbe(in_msg.LineAddress))) { // The L2 has room, so we move the line from the L1 to the L2 - trigger(Event:L1_to_L2, L1IcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_to_L2, L1IcacheMemory.cacheProbe(in_msg.LineAddress)); } else { // The L2 does not have room, so we replace a line from the L2 - trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1IcacheMemory.cacheProbe(in_msg.Address))); + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1IcacheMemory.cacheProbe(in_msg.LineAddress))); } } } @@ -321,36 +343,36 @@ machine(L1Cache, "AMD Hammer-like protocol") { // *** DATA ACCESS *** // Check to see if it is in the OTHER L1 - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - if (L2cacheMemory.cacheAvail(in_msg.Address)) { - trigger(Event:L1_to_L2, in_msg.Address); + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress); } else { - trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.LineAddress)); } } - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1DcacheMemory.cacheAvail(in_msg.Address)) { + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - if (L2cacheMemory.isTagPresent(in_msg.Address)) { + if (L2cacheMemory.isTagPresent(in_msg.LineAddress)) { // L2 has it (maybe not with the right permissions) - trigger(Event:L2_to_L1D, in_msg.Address); + trigger(Event:L2_to_L1D, in_msg.LineAddress); } else { // We have room, the L2 doesn't have it, so the L1 fetches the line - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } } else { // No room in the L1, so we need to make room - if (L2cacheMemory.cacheAvail(L1DcacheMemory.cacheProbe(in_msg.Address))) { + if (L2cacheMemory.cacheAvail(L1DcacheMemory.cacheProbe(in_msg.LineAddress))) { // The L2 has room, so we move the line from the L1 to the L2 - trigger(Event:L1_to_L2, L1DcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_to_L2, L1DcacheMemory.cacheProbe(in_msg.LineAddress)); } else { // The L2 does not have room, so we replace a line from the L2 - trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1DcacheMemory.cacheProbe(in_msg.Address))); + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1DcacheMemory.cacheProbe(in_msg.LineAddress))); } } } @@ -362,33 +384,33 @@ machine(L1Cache, "AMD Hammer-like protocol") { // ACTIONS action(a_issueGETS, "a", desc="Issue GETS") { - enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; - out_msg.Requestor := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Request_Control; - TBEs[address].NumPendingMsgs := numberOfNodes(); // One from each other processor (n-1) plus the memory (+1) + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); // One from each other cache (n-1) plus the memory (+1) } } action(b_issueGETX, "b", desc="Issue GETX") { - enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETX; - out_msg.Requestor := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Request_Control; - TBEs[address].NumPendingMsgs := numberOfNodes(); // One from each other processor (n-1) plus the memory (+1) + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); // One from each other cache (n-1) plus the memory (+1) } } action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := getCacheEntry(address).DataBlk; out_msg.Dirty := getCacheEntry(address).Dirty; @@ -399,21 +421,21 @@ machine(L1Cache, "AMD Hammer-like protocol") { } action(d_issuePUT, "d", desc="Issue PUT") { - enqueue(requestNetwork_out, RequestMsg, latency="CACHE_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:PUT; - out_msg.Requestor := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } action(e_sendData, "e", desc="Send data from cache to requestor") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := getCacheEntry(address).DataBlk; out_msg.Dirty := getCacheEntry(address).Dirty; @@ -425,10 +447,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(ee_sendDataShared, "\e", desc="Send data from cache to requestor, keep a shared copy") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := getCacheEntry(address).DataBlk; out_msg.Dirty := getCacheEntry(address).Dirty; @@ -440,10 +462,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(f_sendAck, "f", desc="Send ack from cache to requestor") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.Acks := 1; out_msg.MessageSize := MessageSizeType:Response_Control; @@ -453,10 +475,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(ff_sendAckShared, "\f", desc="Send shared ack from cache to requestor") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK_SHARED; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.Acks := 1; out_msg.MessageSize := MessageSizeType:Response_Control; @@ -465,11 +487,11 @@ machine(L1Cache, "AMD Hammer-like protocol") { } action(g_sendUnblock, "g", desc="Send unblock to memory") { - enqueue(unblockNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:UNBLOCK; - out_msg.Sender := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Unblock_Control; } } @@ -541,10 +563,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := TBEs[address].DataBlk; out_msg.Dirty := TBEs[address].Dirty; @@ -555,10 +577,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { } action(qq_sendDataFromTBEToMemory, "\q", desc="Send data from TBE to memory") { - enqueue(unblockNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; - out_msg.Sender := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.Dirty := TBEs[address].Dirty; if (TBEs[address].Dirty) { out_msg.Type := CoherenceResponseType:WB_DIRTY; @@ -583,10 +605,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { } action(t_sendExclusiveDataFromTBEToMemory, "t", desc="Send exclusive data from TBE to memory") { - enqueue(unblockNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; - out_msg.Sender := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.DataBlk := TBEs[address].DataBlk; out_msg.Dirty := TBEs[address].Dirty; if (TBEs[address].Dirty) { @@ -628,18 +650,18 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") { if (L1DcacheMemory.isTagPresent(address) == false) { - L1DcacheMemory.allocate(address); + L1DcacheMemory.allocate(address, new Entry); } } action(jj_allocateL1ICacheBlock, "\j", desc="Set L1 I-cache tag equal to tag of block B.") { if (L1IcacheMemory.isTagPresent(address) == false) { - L1IcacheMemory.allocate(address); + L1IcacheMemory.allocate(address, new Entry); } } action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") { - L2cacheMemory.allocate(address); + L2cacheMemory.allocate(address, new Entry); } action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { @@ -664,7 +686,13 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(uu_profileMiss, "\u", desc="Profile the demand miss") { peek(mandatoryQueue_in, CacheMsg) { - profile_miss(in_msg, id); + if (L1IcacheMemory.isTagPresent(address)) { + L1IcacheMemory.profileMiss(in_msg); + } else if (L1DcacheMemory.isTagPresent(address)) { + L1DcacheMemory.profileMiss(in_msg); + } else { + L2cacheMemory.profileMiss(in_msg); + } } } -- cgit v1.2.3