From e9288b2cd35863c600d7ff7bf04f4c08e055e3e0 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Tue, 22 Sep 2009 15:24:16 -0700 Subject: scons: add slicc and ply to sys.path and PYTHONPATH so everyone has access --- src/mem/protocol/SConscript | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/SConscript b/src/mem/protocol/SConscript index 293346f13..700ab40ea 100644 --- a/src/mem/protocol/SConscript +++ b/src/mem/protocol/SConscript @@ -73,7 +73,6 @@ protocol = env['PROTOCOL'] sources = [ protocol_dir.File("RubySlicc_interfaces.slicc"), protocol_dir.File("%s.slicc" % protocol) ] -sys.path[0:0] = [env['ENV']['M5_PLY']] execfile(slicc_dir.File('parser/parser.py').srcnode().abspath) sm_files = read_slicc([s.srcnode().abspath for s in sources]) -- cgit v1.2.3 From 2278363015a2a5cc850b38213833096d33b496e8 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Tue, 22 Sep 2009 18:12:39 -0700 Subject: slicc: Pure python implementation of slicc. This is simply a translation of the C++ slicc into python with very minimal reorganization of the code. The output can be verified as nearly identical by doing a "diff -wBur". Slicc can easily be run manually by using util/slicc --- src/mem/protocol/SConscript | 82 +++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 28 deletions(-) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/SConscript b/src/mem/protocol/SConscript index 700ab40ea..425219580 100644 --- a/src/mem/protocol/SConscript +++ b/src/mem/protocol/SConscript @@ -29,30 +29,56 @@ # Authors: Nathan Binkert import os -import re -import string import sys -from os.path import basename, dirname, exists, expanduser, isdir, isfile -from os.path import join as joinpath - -import SCons +from os.path import isdir, isfile, join as joinpath Import('*') if not env['RUBY']: Return() -slicc_dir = Dir('../slicc') protocol_dir = Dir('.') html_dir = Dir('html') +slicc_dir = Dir('../slicc') + +sys.path[1:1] = [ Dir('..').srcnode().abspath ] +from slicc.parser import SLICC + +slicc_depends = [] +for root,dirs,files in os.walk(slicc_dir.srcnode().abspath): + for f in files: + if f.endswith('.py'): + slicc_depends.append(File(joinpath(root, f))) # # Use SLICC # -def slicc_generator(target, source, env, for_signature): - slicc_bin = str(source[0]) - protocol = source[1].get_contents() + +def slicc_scanner(node, env, path): + contents = node.get_contents() + files = [ line.strip() for line in contents.splitlines() ] + return files + +env.Append(SCANNERS=Scanner(function=slicc_scanner,skeys=['.slicc'])) + +def slicc_emitter(target, source, env): + files = [s.srcnode().abspath for s in source[1:]] + slicc = SLICC(debug=True) + print "SLICC parsing..." + for name in slicc.load(files, verbose=True): + print " %s" % name + + hh,cc = slicc.files() + target.extend(sorted(hh)) + target.extend(sorted(cc)) + f = file('/tmp/asdf', 'w') + for t in target: + print >>f, t + return target, source + +def slicc_action(target, source, env): + protocol = source[0].get_contents() pdir = str(protocol_dir) hdir = str(html_dir) @@ -61,31 +87,31 @@ def slicc_generator(target, source, env, for_signature): if not isdir(hdir): os.mkdir(hdir) - do_html = "html" - cmdline = [ slicc_bin, pdir, hdir, protocol, do_html ] - cmdline += [ str(s) for s in source[2:] ] - cmdline = ' '.join(cmdline) - return cmdline + slicc = SLICC(debug=True) + files = [str(s) for s in source[1:]] + slicc.load(files, verbose=False) -slicc_builder = Builder(generator=slicc_generator) + print "SLICC Generator pass 1..." + slicc.findMachines() -protocol = env['PROTOCOL'] -sources = [ protocol_dir.File("RubySlicc_interfaces.slicc"), - protocol_dir.File("%s.slicc" % protocol) ] + print "SLICC Generator pass 2..." + slicc.generate() -execfile(slicc_dir.File('parser/parser.py').srcnode().abspath) + print "SLICC writing C++ files..." + slicc.writeCodeFiles(pdir) -sm_files = read_slicc([s.srcnode().abspath for s in sources]) -sm_files = [ protocol_dir.File(f) for f in sm_files ] + print "SLICC writing HTML files..." + slicc.writeHTMLFiles(hdir) -hh, cc = scan([s.srcnode().abspath for s in sm_files]) -hh = [ protocol_dir.File(f) for f in hh ] -cc = [ protocol_dir.File(f) for f in cc ] +slicc_builder = Builder(action=slicc_action, emitter=slicc_emitter) -slicc_bin = slicc_dir.File("slicc") +protocol = env['PROTOCOL'] +sources = [ protocol_dir.File("RubySlicc_interfaces.slicc"), + protocol_dir.File("%s.slicc" % protocol) ] env.Append(BUILDERS={'SLICC' : slicc_builder}) -env.SLICC(hh + cc, [ slicc_bin, Value(protocol) ] + sm_files) +nodes = env.SLICC([], [ Value(protocol) ] + sources) +env.Depends(nodes, slicc_depends) -for f in cc: +for f in sorted(s for s in nodes if str(s).endswith('.cc')): Source(f) -- cgit v1.2.3 From 058ccfc7fe7be1b7b7124ecdd0d9d79fe4d6a86f Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Thu, 5 Nov 2009 11:11:05 -0800 Subject: slicc: whack some of Nate's leftover debug code --- src/mem/protocol/SConscript | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/SConscript b/src/mem/protocol/SConscript index 425219580..948fd6c1a 100644 --- a/src/mem/protocol/SConscript +++ b/src/mem/protocol/SConscript @@ -72,9 +72,6 @@ def slicc_emitter(target, source, env): hh,cc = slicc.files() target.extend(sorted(hh)) target.extend(sorted(cc)) - f = file('/tmp/asdf', 'w') - for t in target: - print >>f, t return target, source def slicc_action(target, source, env): -- cgit v1.2.3 From 9098010e3fccf779786c7f0e1dfab9d522f72eb5 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Thu, 5 Nov 2009 11:11:06 -0800 Subject: slicc: tweak file enumeration for scons Right now .cc and .hh files are handled separately, but then they're just munged together at the end by scons, so it doesn't buy us anything. Might as well munge from the start since we'll eventually be adding generated Python files to the list too. --- src/mem/protocol/SConscript | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/SConscript b/src/mem/protocol/SConscript index 948fd6c1a..cd9920d22 100644 --- a/src/mem/protocol/SConscript +++ b/src/mem/protocol/SConscript @@ -69,9 +69,7 @@ def slicc_emitter(target, source, env): for name in slicc.load(files, verbose=True): print " %s" % name - hh,cc = slicc.files() - target.extend(sorted(hh)) - target.extend(sorted(cc)) + target.extend(sorted(slicc.files())) return target, source def slicc_action(target, source, env): -- cgit v1.2.3 From faf1d97f2447f0e3b9dce9fb06e9e87e496333a3 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 13:55:58 -0800 Subject: ruby: fixed dma mi example to work with multiple dma ports --- src/mem/protocol/MI_example-dir.sm | 24 +++++++++++++++--------- src/mem/protocol/MI_example-dma.sm | 2 ++ src/mem/protocol/MI_example-msg.sm | 1 + 3 files changed, 18 insertions(+), 9 deletions(-) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/MI_example-dir.sm b/src/mem/protocol/MI_example-dir.sm index c045419b6..0061a2838 100644 --- a/src/mem/protocol/MI_example-dir.sm +++ b/src/mem/protocol/MI_example-dir.sm @@ -1,8 +1,6 @@ machine(Directory, "Directory protocol") -: int directory_latency, - int dma_select_low_bit, - int dma_select_num_bits +: int directory_latency { MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false"; @@ -74,6 +72,7 @@ machine(Directory, "Directory protocol") State TBEState, desc="Transient State"; DataBlock DataBlk, desc="Data to be written (DMA write only)"; int Len, desc="..."; + MachineID DmaRequestor, desc="DMA requestor"; } external_type(TBETable) { @@ -243,8 +242,7 @@ machine(Directory, "Directory protocol") out_msg.LineAddress := address; out_msg.Type := DMAResponseType:DATA; out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be - out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA, - dma_select_low_bit, dma_select_num_bits)); + out_msg.Destination.add(TBEs[address].DmaRequestor); out_msg.MessageSize := MessageSizeType:Response_Data; } } @@ -259,8 +257,7 @@ machine(Directory, "Directory protocol") out_msg.LineAddress := address; out_msg.Type := DMAResponseType:DATA; out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be - out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA, - dma_select_low_bit, dma_select_num_bits)); + out_msg.Destination.add(TBEs[address].DmaRequestor); out_msg.MessageSize := MessageSizeType:Response_Data; } } @@ -271,8 +268,7 @@ machine(Directory, "Directory protocol") out_msg.PhysicalAddress := address; out_msg.LineAddress := address; out_msg.Type := DMAResponseType:ACK; - out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA, - dma_select_low_bit, dma_select_num_bits)); + out_msg.Destination.add(TBEs[address].DmaRequestor); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } @@ -343,6 +339,14 @@ machine(Directory, "Directory protocol") TBEs[address].DataBlk := in_msg.DataBlk; TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + } + } + + action(r_allocateTbeForDmaRead, "\r", desc="Allocate TBE for DMA Read") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaRequestor := in_msg.Requestor; } } @@ -485,6 +489,7 @@ machine(Directory, "Directory protocol") transition(I, DMA_READ, ID) { //dr_sendDMAData; + r_allocateTbeForDmaRead; qf_queueMemoryFetchRequestDMA; p_popIncomingDMARequestQueue; } @@ -492,6 +497,7 @@ machine(Directory, "Directory protocol") transition(ID, Memory_Data, I) { dr_sendDMAData; //p_popIncomingDMARequestQueue; + w_deallocateTBE; l_popMemQueue; } diff --git a/src/mem/protocol/MI_example-dma.sm b/src/mem/protocol/MI_example-dma.sm index e883288df..0f4894b3a 100644 --- a/src/mem/protocol/MI_example-dma.sm +++ b/src/mem/protocol/MI_example-dma.sm @@ -71,6 +71,7 @@ machine(DMA, "DMA Controller") out_msg.PhysicalAddress := in_msg.PhysicalAddress; out_msg.LineAddress := in_msg.LineAddress; out_msg.Type := DMARequestType:READ; + out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; out_msg.Destination.add(map_Address_to_Directory(address)); @@ -85,6 +86,7 @@ machine(DMA, "DMA Controller") out_msg.PhysicalAddress := in_msg.PhysicalAddress; out_msg.LineAddress := in_msg.LineAddress; out_msg.Type := DMARequestType:WRITE; + out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; out_msg.Destination.add(map_Address_to_Directory(address)); diff --git a/src/mem/protocol/MI_example-msg.sm b/src/mem/protocol/MI_example-msg.sm index d4d557200..3cdb74e49 100644 --- a/src/mem/protocol/MI_example-msg.sm +++ b/src/mem/protocol/MI_example-msg.sm @@ -105,6 +105,7 @@ structure(DMARequestMsg, desc="...", interface="NetworkMessage") { DMARequestType Type, desc="Request type (read/write)"; Address PhysicalAddress, desc="Physical address for this request"; Address LineAddress, desc="Line address for this request"; + MachineID Requestor, desc="Node who initiated the request"; NetDest Destination, desc="Destination"; DataBlock DataBlk, desc="DataBlk attached to this request"; int Len, desc="The length of the request"; -- cgit v1.2.3 From e84881b7a353e8a45640f7d53fa032003272656a Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 13:55:58 -0800 Subject: ruby: Removed unused action z_stall --- src/mem/protocol/MI_example-dma.sm | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/MI_example-dma.sm b/src/mem/protocol/MI_example-dma.sm index 0f4894b3a..79c42e719 100644 --- a/src/mem/protocol/MI_example-dma.sm +++ b/src/mem/protocol/MI_example-dma.sm @@ -115,10 +115,6 @@ machine(DMA, "DMA Controller") dmaResponseQueue_in.dequeue(); } - action(z_stall, "z", desc="dma is busy..stall") { - // do nothing - } - transition(READY, ReadRequest, BUSY_RD) { s_sendReadRequest; p_popRequestQueue; -- cgit v1.2.3 From 2783a7b9ad90d04d74418d7463c255c29ffd8046 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:31 -0800 Subject: ruby: returns the number of LLC needed for broadcast Added feature to CacheMemory to return the number of last level caches. This count is need for broadcast protocols such as MOESI_hammer. --- src/mem/protocol/RubySlicc_ComponentMapping.sm | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/RubySlicc_ComponentMapping.sm b/src/mem/protocol/RubySlicc_ComponentMapping.sm index 559e54a8c..2a027554e 100644 --- a/src/mem/protocol/RubySlicc_ComponentMapping.sm +++ b/src/mem/protocol/RubySlicc_ComponentMapping.sm @@ -29,6 +29,8 @@ // Mapping functions +int getNumberOfLastLevelCaches(); + // NodeID map_address_to_node(Address addr); MachineID mapAddressToRange(Address addr, MachineType type, int low, int high); MachineID map_Address_to_DMA(Address addr); -- cgit v1.2.3 From b0973035b4db86052c1d5d67e8695bdaa27130c2 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:31 -0800 Subject: ruby: added the original hammer protocols from old ruby --- src/mem/protocol/MOESI_hammer-cache.sm | 1104 ++++++++++++++++++++++++++++++++ src/mem/protocol/MOESI_hammer-dir.sm | 317 +++++++++ src/mem/protocol/MOESI_hammer-msg.sm | 83 +++ src/mem/protocol/MOESI_hammer.slicc | 5 + 4 files changed, 1509 insertions(+) create mode 100644 src/mem/protocol/MOESI_hammer-cache.sm create mode 100644 src/mem/protocol/MOESI_hammer-dir.sm create mode 100644 src/mem/protocol/MOESI_hammer-msg.sm create mode 100644 src/mem/protocol/MOESI_hammer.slicc (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm new file mode 100644 index 000000000..d244a9b93 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -0,0 +1,1104 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +machine(L1Cache, "AMD Hammer-like protocol") { + + // STATES + enumeration(State, desc="Cache states", default="L1Cache_State_I") { + // Base states + I, desc="Idle"; + S, desc="Shared"; + O, desc="Owned"; + M, desc="Modified (dirty)"; + MM, desc="Modified (dirty and locally modified)"; + + // Transient States + IM, "IM", desc="Issued GetX"; + SM, "SM", desc="Issued GetX, we still have an old copy of the line"; + OM, "OM", desc="Issued GetX, received data"; + ISM, "ISM", desc="Issued GetX, received data, waiting for all acks"; + M_W, "M^W", desc="Issued GetS, received exclusive data"; + MM_W, "MM^W", desc="Issued GetX, received exclusive data"; + IS, "IS", desc="Issued GetS"; + SS, "SS", desc="Issued GetS, received data, waiting for all acks"; + OI, "OI", desc="Issued PutO, waiting for ack"; + MI, "MI", desc="Issued PutX, waiting for ack"; + II, "II", desc="Issued PutX/O, saw Other_GETS or Other_GETX, waiting for ack"; + } + + // EVENTS + enumeration(Event, desc="Cache events") { + Load, desc="Load request from the processor"; + Ifetch, desc="I-fetch request from the processor"; + Store, desc="Store request from the processor"; + L2_Replacement, desc="L2 Replacement"; + L1_to_L2, desc="L1 to L2 transfer"; + L2_to_L1D, desc="L2 to L1-Data transfer"; + L2_to_L1I, desc="L2 to L1-Instruction transfer"; + + // Requests + Other_GETX, desc="A GetX from another processor"; + Other_GETS, desc="A GetS from another processor"; + + // Responses + Ack, desc="Received an ack message"; + Shared_Ack, desc="Received an ack message, responder has a shared copy"; + Data, desc="Received a data message"; + Shared_Data, desc="Received a data message, responder has a shared copy"; + Exclusive_Data, desc="Received a data message, responder had an exclusive copy, they gave it to us"; + + Writeback_Ack, desc="Writeback O.K. from directory"; + Writeback_Nack, desc="Writeback not O.K. from directory"; + + // Triggers + All_acks, desc="Received all required data and message acks"; + All_acks_no_sharers, desc="Received all acks and no other processor has a shared copy"; + } + + // TYPES + + // CacheEntry + structure(Entry, desc="...") { + Address Address, desc="Address of this block, required by CacheMemory"; + Time LastRef, desc="Last time this block was referenced, required by CacheMemory"; + AccessPermission Permission, desc="Access permission for this block, required by CacheMemory"; + DataBlock DataBlk, desc="data for the block, required by CacheMemory"; + State CacheState, desc="cache state"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + } + + // TBE fields + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; + bool Sharers, desc="On a GetS, did we find any other sharers in the system"; + } + + external_type(NewCacheMemory) { + bool cacheAvail(Address); + Address cacheProbe(Address); + void allocate(Address); + void deallocate(Address); + Entry lookup(Address); + void changePermission(Address, AccessPermission); + bool isTagPresent(Address); + } + + external_type(NewTBETable) { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } + + NewTBETable TBEs, template_hack=""; + NewCacheMemory L1IcacheMemory, template_hack="", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,"L1I"'; + NewCacheMemory L1DcacheMemory, template_hack="", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,"L1D"'; + NewCacheMemory L2cacheMemory, template_hack="", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,"L2"'; + + Entry getCacheEntry(Address addr), return_by_ref="yes" { + if (L2cacheMemory.isTagPresent(addr)) { + return L2cacheMemory[addr]; + } else if (L1DcacheMemory.isTagPresent(addr)) { + return L1DcacheMemory[addr]; + } else { + return L1IcacheMemory[addr]; + } + } + + void changePermission(Address addr, AccessPermission permission) { + if (L2cacheMemory.isTagPresent(addr)) { + return L2cacheMemory.changePermission(addr, permission); + } else if (L1DcacheMemory.isTagPresent(addr)) { + return L1DcacheMemory.changePermission(addr, permission); + } else { + return L1IcacheMemory.changePermission(addr, permission); + } + } + + bool isCacheTagPresent(Address addr) { + return (L2cacheMemory.isTagPresent(addr) || L1DcacheMemory.isTagPresent(addr) || L1IcacheMemory.isTagPresent(addr)); + } + + State getState(Address addr) { + assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false); + assert((L1IcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false); + assert((L1DcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false); + + if(TBEs.isPresent(addr)) { + return TBEs[addr].TBEState; + } else if (isCacheTagPresent(addr)) { + return getCacheEntry(addr).CacheState; + } + return State:I; + } + + void setState(Address addr, State state) { + assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false); + assert((L1IcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false); + assert((L1DcacheMemory.isTagPresent(addr) && L2cacheMemory.isTagPresent(addr)) == false); + + if (TBEs.isPresent(addr)) { + TBEs[addr].TBEState := state; + } + + if (isCacheTagPresent(addr)) { + getCacheEntry(addr).CacheState := state; + + // Set permission + if ((state == State:MM) || + (state == State:MM_W)) { + changePermission(addr, AccessPermission:Read_Write); + } else if (state == State:S || + state == State:O || + state == State:M || + state == State:M_W || + state == State:SM || + state == State:ISM || + state == State:OM || + state == State:SS) { + changePermission(addr, AccessPermission:Read_Only); + } else { + changePermission(addr, AccessPermission:Invalid); + } + } + } + + Event mandatory_request_type_to_event(CacheRequestType type) { + if (type == CacheRequestType:LD) { + return Event:Load; + } else if (type == CacheRequestType:IFETCH) { + return Event:Ifetch; + } else if ((type == CacheRequestType:ST) || (type == CacheRequestType:ATOMIC)) { + return Event:Store; + } else { + error("Invalid CacheRequestType"); + } + } + + MessageBuffer triggerQueue, ordered="true"; + + // ** OUT_PORTS ** + + out_port(requestNetwork_out, RequestMsg, requestFromCache); + out_port(responseNetwork_out, ResponseMsg, responseFromCache); + out_port(unblockNetwork_out, ResponseMsg, unblockFromCache); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); + + // ** IN_PORTS ** + + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue) { + if (triggerQueue_in.isReady()) { + peek(triggerQueue_in, TriggerMsg) { + if (in_msg.Type == TriggerType:ALL_ACKS) { + trigger(Event:All_acks, in_msg.Address); + } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) { + trigger(Event:All_acks_no_sharers, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + // Nothing from the request network + + // Forward Network + in_port(forwardToCache_in, RequestMsg, forwardToCache) { + if (forwardToCache_in.isReady()) { + peek(forwardToCache_in, RequestMsg) { + if (in_msg.Type == CoherenceRequestType:GETX) { + trigger(Event:Other_GETX, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:GETS) { + trigger(Event:Other_GETS, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:WB_ACK) { + trigger(Event:Writeback_Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:WB_NACK) { + trigger(Event:Writeback_Nack, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + // Response Network + in_port(responseToCache_in, ResponseMsg, responseToCache) { + if (responseToCache_in.isReady()) { + peek(responseToCache_in, ResponseMsg) { + if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_SHARED) { + trigger(Event:Shared_Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA) { + trigger(Event:Data, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) { + trigger(Event:Shared_Data, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) { + trigger(Event:Exclusive_Data, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + // Nothing from the unblock network + + // Mandatory Queue + in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") { + if (mandatoryQueue_in.isReady()) { + peek(mandatoryQueue_in, CacheMsg) { + + // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache + + if (in_msg.Type == CacheRequestType:IFETCH) { + // ** INSTRUCTION ACCESS *** + + // Check to see if it is in the OTHER L1 + if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + // The block is in the wrong L1, try to write it to the L2 + if (L2cacheMemory.cacheAvail(in_msg.Address)) { + trigger(Event:L1_to_L2, in_msg.Address); + } else { + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.Address)); + } + } + + if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + } else { + if (L1IcacheMemory.cacheAvail(in_msg.Address)) { + // L1 does't have the line, but we have space for it in the L1 + if (L2cacheMemory.isTagPresent(in_msg.Address)) { + // L2 has it (maybe not with the right permissions) + trigger(Event:L2_to_L1I, in_msg.Address); + } else { + // We have room, the L2 doesn't have it, so the L1 fetches the line + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + } + } else { + // No room in the L1, so we need to make room + if (L2cacheMemory.cacheAvail(L1IcacheMemory.cacheProbe(in_msg.Address))) { + // The L2 has room, so we move the line from the L1 to the L2 + trigger(Event:L1_to_L2, L1IcacheMemory.cacheProbe(in_msg.Address)); + } else { + // The L2 does not have room, so we replace a line from the L2 + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1IcacheMemory.cacheProbe(in_msg.Address))); + } + } + } + } else { + // *** DATA ACCESS *** + + // Check to see if it is in the OTHER L1 + if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + // The block is in the wrong L1, try to write it to the L2 + if (L2cacheMemory.cacheAvail(in_msg.Address)) { + trigger(Event:L1_to_L2, in_msg.Address); + } else { + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.Address)); + } + } + + if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + } else { + if (L1DcacheMemory.cacheAvail(in_msg.Address)) { + // L1 does't have the line, but we have space for it in the L1 + if (L2cacheMemory.isTagPresent(in_msg.Address)) { + // L2 has it (maybe not with the right permissions) + trigger(Event:L2_to_L1D, in_msg.Address); + } else { + // We have room, the L2 doesn't have it, so the L1 fetches the line + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + } + } else { + // No room in the L1, so we need to make room + if (L2cacheMemory.cacheAvail(L1DcacheMemory.cacheProbe(in_msg.Address))) { + // The L2 has room, so we move the line from the L1 to the L2 + trigger(Event:L1_to_L2, L1DcacheMemory.cacheProbe(in_msg.Address)); + } else { + // The L2 does not have room, so we replace a line from the L2 + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1DcacheMemory.cacheProbe(in_msg.Address))); + } + } + } + } + } + } + } + + // ACTIONS + + action(a_issueGETS, "a", desc="Issue GETS") { + enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + out_msg.Requestor := id; + out_msg.Destination.add(map_address_to_node(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + TBEs[address].NumPendingMsgs := numberOfNodes(); // One from each other processor (n-1) plus the memory (+1) + } + } + + action(b_issueGETX, "b", desc="Issue GETX") { + enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + out_msg.Requestor := id; + out_msg.Destination.add(map_address_to_node(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + TBEs[address].NumPendingMsgs := numberOfNodes(); // One from each other processor (n-1) plus the memory (+1) + } + } + + action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; + out_msg.Sender := id; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := getCacheEntry(address).DataBlk; + out_msg.Dirty := getCacheEntry(address).Dirty; + out_msg.Acks := 2; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(d_issuePUT, "d", desc="Issue PUT") { + enqueue(requestNetwork_out, RequestMsg, latency="CACHE_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:PUT; + out_msg.Requestor := id; + out_msg.Destination.add(map_address_to_node(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + + action(e_sendData, "e", desc="Send data from cache to requestor") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.Sender := id; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := getCacheEntry(address).DataBlk; + out_msg.Dirty := getCacheEntry(address).Dirty; + out_msg.Acks := 2; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(ee_sendDataShared, "\e", desc="Send data from cache to requestor, keep a shared copy") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_SHARED; + out_msg.Sender := id; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := getCacheEntry(address).DataBlk; + out_msg.Dirty := getCacheEntry(address).Dirty; + out_msg.Acks := 2; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(f_sendAck, "f", desc="Send ack from cache to requestor") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:ACK; + out_msg.Sender := id; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Acks := 1; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + + action(ff_sendAckShared, "\f", desc="Send shared ack from cache to requestor") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:ACK_SHARED; + out_msg.Sender := id; + out_msg.Destination.add(in_msg.Requestor); + out_msg.Acks := 1; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + + action(g_sendUnblock, "g", desc="Send unblock to memory") { + enqueue(unblockNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:UNBLOCK; + out_msg.Sender := id; + out_msg.Destination.add(map_address_to_node(address)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + } + } + + action(h_load_hit, "h", desc="Notify sequencer the load completed.") { + DEBUG_EXPR(getCacheEntry(address).DataBlk); + sequencer.readCallback(address, getCacheEntry(address).DataBlk); + } + + action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") { + DEBUG_EXPR(getCacheEntry(address).DataBlk); + sequencer.writeCallback(address, getCacheEntry(address).DataBlk); + getCacheEntry(address).Dirty := true; + } + + action(i_allocateTBE, "i", desc="Allocate TBE") { + check_allocate(TBEs); + TBEs.allocate(address); + TBEs[address].DataBlk := getCacheEntry(address).DataBlk; // Data only used for writebacks + TBEs[address].Dirty := getCacheEntry(address).Dirty; + TBEs[address].Sharers := false; + } + + action(j_popTriggerQueue, "j", desc="Pop trigger queue.") { + triggerQueue_in.dequeue(); + } + + action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") { + mandatoryQueue_in.dequeue(); + } + + action(l_popForwardQueue, "l", desc="Pop forwareded request queue.") { + forwardToCache_in.dequeue(); + } + + action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") { + peek(responseToCache_in, ResponseMsg) { + assert(in_msg.Acks > 0); + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - in_msg.Acks; + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + } + } + + action(n_popResponseQueue, "n", desc="Pop response queue") { + responseToCache_in.dequeue(); + } + + action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") { + if (TBEs[address].NumPendingMsgs == 0) { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.Address := address; + if (TBEs[address].Sharers) { + out_msg.Type := TriggerType:ALL_ACKS; + } else { + out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS; + } + } + } + } + + action(p_decrementNumberOfMessagesByOne, "p", desc="Decrement the number of messages for which we're waiting by one") { + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - 1; + } + + action(pp_incrementNumberOfMessagesByOne, "\p", desc="Increment the number of messages for which we're waiting by one") { + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs + 1; + } + + action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.Sender := id; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.Dirty := TBEs[address].Dirty; + out_msg.Acks := 2; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(qq_sendDataFromTBEToMemory, "\q", desc="Send data from TBE to memory") { + enqueue(unblockNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + out_msg.Address := address; + out_msg.Sender := id; + out_msg.Destination.add(map_address_to_node(address)); + out_msg.Dirty := TBEs[address].Dirty; + if (TBEs[address].Dirty) { + out_msg.Type := CoherenceResponseType:WB_DIRTY; + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } else { + out_msg.Type := CoherenceResponseType:WB_CLEAN; + // NOTE: in a real system this would not send data. We send + // data here only so we can check it at the memory + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(r_setSharerBit, "r", desc="We saw other sharers") { + TBEs[address].Sharers := true; + } + + action(s_deallocateTBE, "s", desc="Deallocate TBE") { + TBEs.deallocate(address); + } + + action(t_sendExclusiveDataFromTBEToMemory, "t", desc="Send exclusive data from TBE to memory") { + enqueue(unblockNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + out_msg.Address := address; + out_msg.Sender := id; + out_msg.Destination.add(map_address_to_node(address)); + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.Dirty := TBEs[address].Dirty; + if (TBEs[address].Dirty) { + out_msg.Type := CoherenceResponseType:WB_EXCLUSIVE_DIRTY; + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } else { + out_msg.Type := CoherenceResponseType:WB_EXCLUSIVE_CLEAN; + // NOTE: in a real system this would not send data. We send + // data here only so we can check it at the memory + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(u_writeDataToCache, "u", desc="Write data to cache") { + peek(responseToCache_in, ResponseMsg) { + getCacheEntry(address).DataBlk := in_msg.DataBlk; + getCacheEntry(address).Dirty := in_msg.Dirty; + } + } + + action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") { + peek(responseToCache_in, ResponseMsg) { + assert(getCacheEntry(address).DataBlk == in_msg.DataBlk); + getCacheEntry(address).DataBlk := in_msg.DataBlk; + getCacheEntry(address).Dirty := in_msg.Dirty; + } + } + + action(gg_deallocateL1CacheBlock, "\g", desc="Deallocate cache block. Sets the cache to invalid, allowing a replacement in parallel with a fetch.") { + if (L1DcacheMemory.isTagPresent(address)) { + L1DcacheMemory.deallocate(address); + } else { + L1IcacheMemory.deallocate(address); + } + } + + action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") { + if (L1DcacheMemory.isTagPresent(address) == false) { + L1DcacheMemory.allocate(address); + } + } + + action(jj_allocateL1ICacheBlock, "\j", desc="Set L1 I-cache tag equal to tag of block B.") { + if (L1IcacheMemory.isTagPresent(address) == false) { + L1IcacheMemory.allocate(address); + } + } + + action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") { + L2cacheMemory.allocate(address); + } + + action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { + L2cacheMemory.deallocate(address); + } + + action(ss_copyFromL1toL2, "\s", desc="Copy data block from L1 (I or D) to L2") { + if (L1DcacheMemory.isTagPresent(address)) { + L2cacheMemory[address] := L1DcacheMemory[address]; + } else { + L2cacheMemory[address] := L1IcacheMemory[address]; + } + } + + action(tt_copyFromL2toL1, "\t", desc="Copy data block from L2 to L1 (I or D)") { + if (L1DcacheMemory.isTagPresent(address)) { + L1DcacheMemory[address] := L2cacheMemory[address]; + } else { + L1IcacheMemory[address] := L2cacheMemory[address]; + } + } + + action(uu_profileMiss, "\u", desc="Profile the demand miss") { + peek(mandatoryQueue_in, CacheMsg) { + profile_miss(in_msg, id); + } + } + + action(zz_recycleMandatoryQueue, "\z", desc="Send the head of the mandatory queue to the back of the queue.") { + mandatoryQueue_in.recycle(); + } + + //***************************************************** + // TRANSITIONS + //***************************************************** + + // Transitions for Load/Store/L2_Replacement from transient states + transition({IM, SM, ISM, OM, IS, SS, OI, MI, II}, {Store, L2_Replacement}) { + zz_recycleMandatoryQueue; + } + + transition({M_W, MM_W}, {L2_Replacement}) { + zz_recycleMandatoryQueue; + } + + transition({IM, IS, OI, MI, II}, {Load, Ifetch}) { + zz_recycleMandatoryQueue; + } + + transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II}, L1_to_L2) { + zz_recycleMandatoryQueue; + } + + // Transitions moving data between the L1 and L2 caches + transition({I, S, O, M, MM}, L1_to_L2) { + vv_allocateL2CacheBlock; + ss_copyFromL1toL2; // Not really needed for state I + gg_deallocateL1CacheBlock; + } + + transition({I, S, O, M, MM}, L2_to_L1D) { + ii_allocateL1DCacheBlock; + tt_copyFromL2toL1; // Not really needed for state I + rr_deallocateL2CacheBlock; + } + + transition({I, S, O, M, MM}, L2_to_L1I) { + jj_allocateL1ICacheBlock; + tt_copyFromL2toL1; // Not really needed for state I + rr_deallocateL2CacheBlock; + } + + // Transitions from Idle + transition(I, Load, IS) { + ii_allocateL1DCacheBlock; + i_allocateTBE; + a_issueGETS; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(I, Ifetch, IS) { + jj_allocateL1ICacheBlock; + i_allocateTBE; + a_issueGETS; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(I, Store, IM) { + ii_allocateL1DCacheBlock; + i_allocateTBE; + b_issueGETX; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(I, L2_Replacement) { + rr_deallocateL2CacheBlock; + } + + transition(I, {Other_GETX, Other_GETS}) { + f_sendAck; + l_popForwardQueue; + } + + // Transitions from Shared + transition({S, SM, ISM}, {Load, Ifetch}) { + h_load_hit; + k_popMandatoryQueue; + } + + transition(S, Store, SM) { + i_allocateTBE; + b_issueGETX; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(S, L2_Replacement, I) { + rr_deallocateL2CacheBlock; + } + + transition(S, Other_GETX, I) { + f_sendAck; + l_popForwardQueue; + } + + transition(S, Other_GETS) { + ff_sendAckShared; + l_popForwardQueue; + } + + // Transitions from Owned + transition({O, OM, SS, MM_W, M_W}, {Load, Ifetch}) { + h_load_hit; + k_popMandatoryQueue; + } + + transition(O, Store, OM) { + i_allocateTBE; + b_issueGETX; + p_decrementNumberOfMessagesByOne; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(O, L2_Replacement, OI) { + i_allocateTBE; + d_issuePUT; + rr_deallocateL2CacheBlock; + } + + transition(O, Other_GETX, I) { + e_sendData; + l_popForwardQueue; + } + + transition(O, Other_GETS) { + ee_sendDataShared; + l_popForwardQueue; + } + + // Transitions from Modified + transition(MM, {Load, Ifetch}) { + h_load_hit; + k_popMandatoryQueue; + } + + transition(MM, Store) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(MM, L2_Replacement, MI) { + i_allocateTBE; + d_issuePUT; + rr_deallocateL2CacheBlock; + } + + transition(MM, Other_GETX, I) { + c_sendExclusiveData; + l_popForwardQueue; + } + + transition(MM, Other_GETS, I) { + c_sendExclusiveData; + l_popForwardQueue; + } + + // Transitions from Dirty Exclusive + transition(M, {Load, Ifetch}) { + h_load_hit; + k_popMandatoryQueue; + } + + transition(M, Store, MM) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(M, L2_Replacement, MI) { + i_allocateTBE; + d_issuePUT; + rr_deallocateL2CacheBlock; + } + + transition(M, Other_GETX, I) { + c_sendExclusiveData; + l_popForwardQueue; + } + + transition(M, Other_GETS, O) { + ee_sendDataShared; + l_popForwardQueue; + } + + // Transitions from IM + + transition(IM, {Other_GETX, Other_GETS}) { + f_sendAck; + l_popForwardQueue; + } + + transition(IM, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(IM, Data, ISM) { + u_writeDataToCache; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(IM, Exclusive_Data, MM_W) { + u_writeDataToCache; + m_decrementNumberOfMessages; + o_checkForCompletion; + hh_store_hit; + n_popResponseQueue; + } + + // Transitions from SM + transition(SM, Other_GETS) { + ff_sendAckShared; + l_popForwardQueue; + } + + transition(SM, Other_GETX, IM) { + f_sendAck; + l_popForwardQueue; + } + + transition(SM, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(SM, Data, ISM) { + v_writeDataToCacheVerify; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + // Transitions from ISM + transition(ISM, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(ISM, All_acks_no_sharers, MM) { + hh_store_hit; + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from OM + + transition(OM, Other_GETX, IM) { + e_sendData; + pp_incrementNumberOfMessagesByOne; + l_popForwardQueue; + } + + transition(OM, Other_GETS) { + ee_sendDataShared; + l_popForwardQueue; + } + + transition(OM, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(OM, {All_acks, All_acks_no_sharers}, MM) { + hh_store_hit; + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from IS + + transition(IS, {Other_GETX, Other_GETS}) { + f_sendAck; + l_popForwardQueue; + } + + transition(IS, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(IS, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(IS, Data, SS) { + u_writeDataToCache; + m_decrementNumberOfMessages; + o_checkForCompletion; + h_load_hit; + n_popResponseQueue; + } + + transition(IS, Exclusive_Data, M_W) { + u_writeDataToCache; + m_decrementNumberOfMessages; + o_checkForCompletion; + h_load_hit; + n_popResponseQueue; + } + + transition(IS, Shared_Data, SS) { + u_writeDataToCache; + r_setSharerBit; + m_decrementNumberOfMessages; + o_checkForCompletion; + h_load_hit; + n_popResponseQueue; + } + + // Transitions from SS + + transition(SS, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(SS, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(SS, All_acks, S) { + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + transition(SS, All_acks_no_sharers, S) { + // Note: The directory might still be the owner, so that is why we go to S + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from MM_W + + transition(MM_W, Store) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(MM_W, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(MM_W, All_acks_no_sharers, MM) { + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from M_W + + transition(M_W, Store, MM_W) { + hh_store_hit; + k_popMandatoryQueue; + } + + transition(M_W, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(M_W, All_acks_no_sharers, M) { + g_sendUnblock; + s_deallocateTBE; + j_popTriggerQueue; + } + + // Transitions from OI/MI + + transition({OI, MI}, Other_GETX, II) { + q_sendDataFromTBEToCache; + l_popForwardQueue; + } + + transition({OI, MI}, Other_GETS, OI) { + q_sendDataFromTBEToCache; + l_popForwardQueue; + } + + transition(MI, Writeback_Ack, I) { + t_sendExclusiveDataFromTBEToMemory; + s_deallocateTBE; + l_popForwardQueue; + } + + transition(OI, Writeback_Ack, I) { + qq_sendDataFromTBEToMemory; + s_deallocateTBE; + l_popForwardQueue; + } + + // Transitions from II + transition(II, {Other_GETS, Other_GETX}, II) { + f_sendAck; + l_popForwardQueue; + } + + transition(II, Writeback_Ack, I) { + g_sendUnblock; + s_deallocateTBE; + l_popForwardQueue; + } + + transition(II, Writeback_Nack, I) { + s_deallocateTBE; + l_popForwardQueue; + } +} + diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm new file mode 100644 index 000000000..836fa9787 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -0,0 +1,317 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +machine(Directory, "AMD Hammer-like protocol") { + + // STATES + enumeration(State, desc="Directory states", default="Directory_State_E") { + // Base states + NO, desc="Not Owner"; + O, desc="Owner"; + E, desc="Exclusive Owner (we can provide the data in exclusive)"; + NO_B, "NO^B", desc="Not Owner, Blocked"; + O_B, "O^B", desc="Owner, Blocked"; + WB, desc="Blocked on a writeback"; + } + + // Events + enumeration(Event, desc="Directory events") { + GETX, desc="A GETX arrives"; + GETS, desc="A GETS arrives"; + PUT, desc="A PUT arrives"; + Unblock, desc="An unblock message arrives"; + Writeback_Clean, desc="The final part of a PutX (no data)"; + Writeback_Dirty, desc="The final part of a PutX (data)"; + Writeback_Exclusive_Clean, desc="The final part of a PutX (no data, exclusive)"; + Writeback_Exclusive_Dirty, desc="The final part of a PutX (data, exclusive)"; + } + + // TYPES + + // DirectoryEntry + structure(Entry, desc="...") { + State DirectoryState, desc="Directory state"; + DataBlock DataBlk, desc="data for the block"; + } + + external_type(DirectoryMemory) { + Entry lookup(Address); + bool isPresent(Address); + } + + // ** OBJECTS ** + + DirectoryMemory directory; + + State getState(Address addr) { + return directory[addr].DirectoryState; + } + + void setState(Address addr, State state) { + directory[addr].DirectoryState := state; + } + + // ** OUT_PORTS ** + out_port(forwardNetwork_out, RequestMsg, forwardFromDir); + out_port(responseNetwork_out, ResponseMsg, responseFromDir); + out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests + + // ** IN_PORTS ** + + in_port(unblockNetwork_in, ResponseMsg, unblockToDir) { + if (unblockNetwork_in.isReady()) { + peek(unblockNetwork_in, ResponseMsg) { + if (in_msg.Type == CoherenceResponseType:UNBLOCK) { + trigger(Event:Unblock, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:WB_CLEAN) { + trigger(Event:Writeback_Clean, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:WB_DIRTY) { + trigger(Event:Writeback_Dirty, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:WB_EXCLUSIVE_CLEAN) { + trigger(Event:Writeback_Exclusive_Clean, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:WB_EXCLUSIVE_DIRTY) { + trigger(Event:Writeback_Exclusive_Dirty, in_msg.Address); + } else { + error("Invalid message"); + } + } + } + } + + in_port(requestQueue_in, RequestMsg, requestToDir) { + if (requestQueue_in.isReady()) { + peek(requestQueue_in, RequestMsg) { + if (in_msg.Type == CoherenceRequestType:GETS) { + trigger(Event:GETS, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:GETX) { + trigger(Event:GETX, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:PUT) { + trigger(Event:PUT, in_msg.Address); + } else { + error("Invalid message"); + } + } + } + } + + // Actions + + action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:WB_ACK; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:WB_NACK; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(d_sendData, "d", desc="Send data to requestor") { + peek(requestQueue_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.Sender := id; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := directory[in_msg.Address].DataBlk; + out_msg.Dirty := false; // By definition, the block is now clean + out_msg.Acks := 1; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dd_sendExclusiveData, "\d", desc="Send exclusive data to requestor") { + peek(requestQueue_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; + out_msg.Sender := id; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := directory[in_msg.Address].DataBlk; + out_msg.Dirty := false; // By definition, the block is now clean + out_msg.Acks := 1; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(f_forwardRequest, "f", desc="Forward requests") { + if (numberOfNodes() > 1) { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") { + out_msg.Address := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.broadcast(); // Send to everyone, but... + out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + } + + action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") { + requestQueue_in.dequeue(); + } + + action(j_popIncomingUnblockQueue, "j", desc="Pop incoming unblock queue") { + unblockNetwork_in.dequeue(); + } + + action(l_writeDataToMemory, "l", desc="Write PUTX/PUTO data to memory") { + peek(unblockNetwork_in, ResponseMsg) { + assert(in_msg.Dirty); + assert(in_msg.MessageSize == MessageSizeType:Writeback_Data); + directory[in_msg.Address].DataBlk := in_msg.DataBlk; + DEBUG_EXPR(in_msg.Address); + DEBUG_EXPR(in_msg.DataBlk); + } + } + + action(ll_checkIncomingWriteback, "\l", desc="Check PUTX/PUTO response message") { + peek(unblockNetwork_in, ResponseMsg) { + assert(in_msg.Dirty == false); + assert(in_msg.MessageSize == MessageSizeType:Writeback_Control); + + // NOTE: The following check would not be valid in a real + // implementation. We include the data in the "dataless" + // message so we can assert the clean data matches the datablock + // in memory + assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk); + } + } + + // action(z_stall, "z", desc="Cannot be handled right now.") { + // Special name recognized as do nothing case + // } + + action(zz_recycleRequest, "\z", desc="Recycle the request queue") { + requestQueue_in.recycle(); + } + + // TRANSITIONS + + transition(E, GETX, NO_B) { + dd_sendExclusiveData; + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(E, GETS, NO_B) { + dd_sendExclusiveData; + f_forwardRequest; + i_popIncomingRequestQueue; + } + + // + transition(O, GETX, NO_B) { + d_sendData; + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(O, GETS, O_B) { + d_sendData; + f_forwardRequest; + i_popIncomingRequestQueue; + } + + // + transition(NO, GETX, NO_B) { + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(NO, GETS, NO_B) { + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(NO, PUT, WB) { + a_sendWriteBackAck; + i_popIncomingRequestQueue; + } + + transition({O, E}, PUT) { + b_sendWriteBackNack; + i_popIncomingRequestQueue; + } + + // Blocked states + transition({NO_B, O_B, WB}, {GETS, GETX, PUT}) { + zz_recycleRequest; + } + + transition(NO_B, Unblock, NO) { + j_popIncomingUnblockQueue; + } + + transition(O_B, Unblock, O) { + j_popIncomingUnblockQueue; + } + + // WB + transition(WB, Writeback_Dirty, O) { + l_writeDataToMemory; + j_popIncomingUnblockQueue; + } + + transition(WB, Writeback_Exclusive_Dirty, E) { + l_writeDataToMemory; + j_popIncomingUnblockQueue; + } + + transition(WB, Writeback_Clean, O) { + ll_checkIncomingWriteback; + j_popIncomingUnblockQueue; + } + + transition(WB, Writeback_Exclusive_Clean, E) { + ll_checkIncomingWriteback; + j_popIncomingUnblockQueue; + } + + transition(WB, Unblock, NO) { + j_popIncomingUnblockQueue; + } +} diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm new file mode 100644 index 000000000..43e00a2d1 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer-msg.sm @@ -0,0 +1,83 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// CoherenceRequestType +enumeration(CoherenceRequestType, desc="...") { + GETX, desc="Get eXclusive"; + GETS, desc="Get Shared"; + PUT, desc="Put Ownership"; + WB_ACK, desc="Writeback ack"; + WB_NACK, desc="Writeback neg. ack"; +} + +// CoherenceResponseType +enumeration(CoherenceResponseType, desc="...") { + ACK, desc="ACKnowledgment, responder does not have a copy"; + ACK_SHARED, desc="ACKnowledgment, responder has a shared copy"; + DATA, desc="Data, responder does not have a copy"; + DATA_SHARED, desc="Data, responder has a shared copy"; + DATA_EXCLUSIVE, desc="Data, responder was exclusive, gave us a copy, and they went to invalid"; + WB_CLEAN, desc="Clean writeback"; + WB_DIRTY, desc="Dirty writeback"; + WB_EXCLUSIVE_CLEAN, desc="Clean writeback of exclusive data"; + WB_EXCLUSIVE_DIRTY, desc="Dirty writeback of exclusive data"; + UNBLOCK, desc="Unblock"; +} + +// TriggerType +enumeration(TriggerType, desc="...") { + ALL_ACKS, desc="See corresponding event"; + ALL_ACKS_NO_SHARERS, desc="See corresponding event"; +} + +// TriggerMsg +structure(TriggerMsg, desc="...", interface="Message") { + Address Address, desc="Physical address for this request"; + TriggerType Type, desc="Type of trigger"; +} + +// RequestMsg (and also forwarded requests) +structure(RequestMsg, desc="...", interface="NetworkMessage") { + Address Address, desc="Physical address for this request"; + CoherenceRequestType Type, desc="Type of request (GetS, GetX, PutX, etc)"; + NodeID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Multicast destination mask"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +// ResponseMsg (and also unblock requests) +structure(ResponseMsg, desc="...", interface="NetworkMessage") { + Address Address, desc="Physical address for this request"; + CoherenceResponseType Type, desc="Type of response (Ack, Data, etc)"; + NodeID Sender, desc="Node who sent the data"; + NetDest Destination, desc="Node to whom the data is sent"; + DataBlock DataBlk, desc="data for the cache line"; + bool Dirty, desc="Is the data dirty (different than memory)?"; + int Acks, desc="How many messages this counts as"; + MessageSizeType MessageSize, desc="size category of the message"; +} diff --git a/src/mem/protocol/MOESI_hammer.slicc b/src/mem/protocol/MOESI_hammer.slicc new file mode 100644 index 000000000..8ff931e04 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer.slicc @@ -0,0 +1,5 @@ +../protocols/MOESI_hammer-msg.sm +../protocols/hammer-ni.sm +../protocols/MOESI_hammer-cache.sm +../protocols/MOESI_hammer-dir.sm +../protocols/standard_1level-node.sm -- cgit v1.2.3 From 877be2009c0871effb3494b52ce5221d58a594d6 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:32 -0800 Subject: ruby: Changes necessary to get the hammer protocol to work in GEM5 --- src/mem/protocol/MOESI_hammer-cache.sm | 178 +++++++++++++++++++-------------- src/mem/protocol/MOESI_hammer-dir.sm | 38 +++++-- src/mem/protocol/MOESI_hammer-msg.sm | 7 +- src/mem/protocol/MOESI_hammer.slicc | 9 +- src/mem/protocol/SConsopts | 1 + 5 files changed, 141 insertions(+), 92 deletions(-) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index d244a9b93..6fb5091af 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -1,5 +1,6 @@ /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 2009 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,9 +25,27 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * AMD's contributions to the MOESI hammer protocol do not constitute an + * endorsement of its similarity to any AMD products. + * + * Authors: Milo Martin + * Brad Beckmann */ -machine(L1Cache, "AMD Hammer-like protocol") { +machine(L1Cache, "AMD Hammer-like protocol") +: int cache_response_latency, + int issue_latency +{ + + // NETWORK BUFFERS + MessageBuffer requestFromCache, network="To", virtual_network="3", ordered="false"; + MessageBuffer responseFromCache, network="To", virtual_network="1", ordered="false"; + MessageBuffer unblockFromCache, network="To", virtual_network="0", ordered="false"; + + MessageBuffer forwardToCache, network="From", virtual_network="2", ordered="false"; + MessageBuffer responseToCache, network="From", virtual_network="1", ordered="false"; + // STATES enumeration(State, desc="Cache states", default="L1Cache_State_I") { @@ -82,14 +101,16 @@ machine(L1Cache, "AMD Hammer-like protocol") { // TYPES + // STRUCTURE DEFINITIONS + + MessageBuffer mandatoryQueue, ordered="false"; + Sequencer sequencer, factory='RubySystem::getSequencer(m_cfg["sequencer"])'; + // CacheEntry - structure(Entry, desc="...") { - Address Address, desc="Address of this block, required by CacheMemory"; - Time LastRef, desc="Last time this block was referenced, required by CacheMemory"; - AccessPermission Permission, desc="Access permission for this block, required by CacheMemory"; - DataBlock DataBlk, desc="data for the block, required by CacheMemory"; + structure(Entry, desc="...", interface="AbstractCacheEntry") { State CacheState, desc="cache state"; bool Dirty, desc="Is the data dirty (different than memory)?"; + DataBlock DataBlk, desc="data for the block"; } // TBE fields @@ -101,27 +122,28 @@ machine(L1Cache, "AMD Hammer-like protocol") { bool Sharers, desc="On a GetS, did we find any other sharers in the system"; } - external_type(NewCacheMemory) { + external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); bool isTagPresent(Address); + void profileMiss(CacheMsg); } - external_type(NewTBETable) { + external_type(TBETable) { TBE lookup(Address); void allocate(Address); void deallocate(Address); bool isPresent(Address); } - NewTBETable TBEs, template_hack=""; - NewCacheMemory L1IcacheMemory, template_hack="", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,"L1I"'; - NewCacheMemory L1DcacheMemory, template_hack="", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,"L1D"'; - NewCacheMemory L2cacheMemory, template_hack="", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,"L2"'; + TBETable TBEs, template_hack=""; + CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])'; + CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])'; + CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])'; Entry getCacheEntry(Address addr), return_by_ref="yes" { if (L2cacheMemory.isTagPresent(addr)) { @@ -284,36 +306,36 @@ machine(L1Cache, "AMD Hammer-like protocol") { // ** INSTRUCTION ACCESS *** // Check to see if it is in the OTHER L1 - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - if (L2cacheMemory.cacheAvail(in_msg.Address)) { - trigger(Event:L1_to_L2, in_msg.Address); + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress); } else { - trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.LineAddress)); } } - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1IcacheMemory.cacheAvail(in_msg.Address)) { + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - if (L2cacheMemory.isTagPresent(in_msg.Address)) { + if (L2cacheMemory.isTagPresent(in_msg.LineAddress)) { // L2 has it (maybe not with the right permissions) - trigger(Event:L2_to_L1I, in_msg.Address); + trigger(Event:L2_to_L1I, in_msg.LineAddress); } else { // We have room, the L2 doesn't have it, so the L1 fetches the line - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } } else { // No room in the L1, so we need to make room - if (L2cacheMemory.cacheAvail(L1IcacheMemory.cacheProbe(in_msg.Address))) { + if (L2cacheMemory.cacheAvail(L1IcacheMemory.cacheProbe(in_msg.LineAddress))) { // The L2 has room, so we move the line from the L1 to the L2 - trigger(Event:L1_to_L2, L1IcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_to_L2, L1IcacheMemory.cacheProbe(in_msg.LineAddress)); } else { // The L2 does not have room, so we replace a line from the L2 - trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1IcacheMemory.cacheProbe(in_msg.Address))); + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1IcacheMemory.cacheProbe(in_msg.LineAddress))); } } } @@ -321,36 +343,36 @@ machine(L1Cache, "AMD Hammer-like protocol") { // *** DATA ACCESS *** // Check to see if it is in the OTHER L1 - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - if (L2cacheMemory.cacheAvail(in_msg.Address)) { - trigger(Event:L1_to_L2, in_msg.Address); + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress); } else { - trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(in_msg.LineAddress)); } } - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1DcacheMemory.cacheAvail(in_msg.Address)) { + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - if (L2cacheMemory.isTagPresent(in_msg.Address)) { + if (L2cacheMemory.isTagPresent(in_msg.LineAddress)) { // L2 has it (maybe not with the right permissions) - trigger(Event:L2_to_L1D, in_msg.Address); + trigger(Event:L2_to_L1D, in_msg.LineAddress); } else { // We have room, the L2 doesn't have it, so the L1 fetches the line - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } } else { // No room in the L1, so we need to make room - if (L2cacheMemory.cacheAvail(L1DcacheMemory.cacheProbe(in_msg.Address))) { + if (L2cacheMemory.cacheAvail(L1DcacheMemory.cacheProbe(in_msg.LineAddress))) { // The L2 has room, so we move the line from the L1 to the L2 - trigger(Event:L1_to_L2, L1DcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_to_L2, L1DcacheMemory.cacheProbe(in_msg.LineAddress)); } else { // The L2 does not have room, so we replace a line from the L2 - trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1DcacheMemory.cacheProbe(in_msg.Address))); + trigger(Event:L2_Replacement, L2cacheMemory.cacheProbe(L1DcacheMemory.cacheProbe(in_msg.LineAddress))); } } } @@ -362,33 +384,33 @@ machine(L1Cache, "AMD Hammer-like protocol") { // ACTIONS action(a_issueGETS, "a", desc="Issue GETS") { - enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; - out_msg.Requestor := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Request_Control; - TBEs[address].NumPendingMsgs := numberOfNodes(); // One from each other processor (n-1) plus the memory (+1) + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); // One from each other cache (n-1) plus the memory (+1) } } action(b_issueGETX, "b", desc="Issue GETX") { - enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETX; - out_msg.Requestor := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Request_Control; - TBEs[address].NumPendingMsgs := numberOfNodes(); // One from each other processor (n-1) plus the memory (+1) + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); // One from each other cache (n-1) plus the memory (+1) } } action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := getCacheEntry(address).DataBlk; out_msg.Dirty := getCacheEntry(address).Dirty; @@ -399,21 +421,21 @@ machine(L1Cache, "AMD Hammer-like protocol") { } action(d_issuePUT, "d", desc="Issue PUT") { - enqueue(requestNetwork_out, RequestMsg, latency="CACHE_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:PUT; - out_msg.Requestor := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } action(e_sendData, "e", desc="Send data from cache to requestor") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := getCacheEntry(address).DataBlk; out_msg.Dirty := getCacheEntry(address).Dirty; @@ -425,10 +447,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(ee_sendDataShared, "\e", desc="Send data from cache to requestor, keep a shared copy") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := getCacheEntry(address).DataBlk; out_msg.Dirty := getCacheEntry(address).Dirty; @@ -440,10 +462,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(f_sendAck, "f", desc="Send ack from cache to requestor") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.Acks := 1; out_msg.MessageSize := MessageSizeType:Response_Control; @@ -453,10 +475,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(ff_sendAckShared, "\f", desc="Send shared ack from cache to requestor") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK_SHARED; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.Acks := 1; out_msg.MessageSize := MessageSizeType:Response_Control; @@ -465,11 +487,11 @@ machine(L1Cache, "AMD Hammer-like protocol") { } action(g_sendUnblock, "g", desc="Send unblock to memory") { - enqueue(unblockNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:UNBLOCK; - out_msg.Sender := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Unblock_Control; } } @@ -541,10 +563,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") { peek(forwardToCache_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := TBEs[address].DataBlk; out_msg.Dirty := TBEs[address].Dirty; @@ -555,10 +577,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { } action(qq_sendDataFromTBEToMemory, "\q", desc="Send data from TBE to memory") { - enqueue(unblockNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; - out_msg.Sender := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.Dirty := TBEs[address].Dirty; if (TBEs[address].Dirty) { out_msg.Type := CoherenceResponseType:WB_DIRTY; @@ -583,10 +605,10 @@ machine(L1Cache, "AMD Hammer-like protocol") { } action(t_sendExclusiveDataFromTBEToMemory, "t", desc="Send exclusive data from TBE to memory") { - enqueue(unblockNetwork_out, ResponseMsg, latency="CACHE_LATENCY") { + enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) { out_msg.Address := address; - out_msg.Sender := id; - out_msg.Destination.add(map_address_to_node(address)); + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.DataBlk := TBEs[address].DataBlk; out_msg.Dirty := TBEs[address].Dirty; if (TBEs[address].Dirty) { @@ -628,18 +650,18 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") { if (L1DcacheMemory.isTagPresent(address) == false) { - L1DcacheMemory.allocate(address); + L1DcacheMemory.allocate(address, new Entry); } } action(jj_allocateL1ICacheBlock, "\j", desc="Set L1 I-cache tag equal to tag of block B.") { if (L1IcacheMemory.isTagPresent(address) == false) { - L1IcacheMemory.allocate(address); + L1IcacheMemory.allocate(address, new Entry); } } action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") { - L2cacheMemory.allocate(address); + L2cacheMemory.allocate(address, new Entry); } action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { @@ -664,7 +686,13 @@ machine(L1Cache, "AMD Hammer-like protocol") { action(uu_profileMiss, "\u", desc="Profile the demand miss") { peek(mandatoryQueue_in, CacheMsg) { - profile_miss(in_msg, id); + if (L1IcacheMemory.isTagPresent(address)) { + L1IcacheMemory.profileMiss(in_msg); + } else if (L1DcacheMemory.isTagPresent(address)) { + L1DcacheMemory.profileMiss(in_msg); + } else { + L2cacheMemory.profileMiss(in_msg); + } } } diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm index 836fa9787..0f7c58acd 100644 --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -1,5 +1,6 @@ /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 2009 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,9 +25,26 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * AMD's contributions to the MOESI hammer protocol do not constitute an + * endorsement of its similarity to any AMD products. + * + * Authors: Milo Martin + * Brad Beckmann */ -machine(Directory, "AMD Hammer-like protocol") { +machine(Directory, "AMD Hammer-like protocol") +: int memory_controller_latency, + int memory_latency +{ + + MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false"; + MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; +// MessageBuffer dmaRequestFromDir, network="To", virtual_network="4", ordered="true"; + + MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; + MessageBuffer unblockToDir, network="From", virtual_network="0", ordered="false"; +// MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; // STATES enumeration(State, desc="Directory states", default="Directory_State_E") { @@ -66,7 +84,7 @@ machine(Directory, "AMD Hammer-like protocol") { // ** OBJECTS ** - DirectoryMemory directory; + DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; State getState(Address addr) { return directory[addr].DirectoryState; @@ -123,7 +141,7 @@ machine(Directory, "AMD Hammer-like protocol") { action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") { peek(requestQueue_in, RequestMsg) { - enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:WB_ACK; out_msg.Requestor := in_msg.Requestor; @@ -135,7 +153,7 @@ machine(Directory, "AMD Hammer-like protocol") { action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") { peek(requestQueue_in, RequestMsg) { - enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:WB_NACK; out_msg.Requestor := in_msg.Requestor; @@ -147,10 +165,10 @@ machine(Directory, "AMD Hammer-like protocol") { action(d_sendData, "d", desc="Send data to requestor") { peek(requestQueue_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := directory[in_msg.Address].DataBlk; out_msg.Dirty := false; // By definition, the block is now clean @@ -162,10 +180,10 @@ machine(Directory, "AMD Hammer-like protocol") { action(dd_sendExclusiveData, "\d", desc="Send exclusive data to requestor") { peek(requestQueue_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; - out_msg.Sender := id; + out_msg.Sender := machineID; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := directory[in_msg.Address].DataBlk; out_msg.Dirty := false; // By definition, the block is now clean @@ -176,9 +194,9 @@ machine(Directory, "AMD Hammer-like protocol") { } action(f_forwardRequest, "f", desc="Forward requests") { - if (numberOfNodes() > 1) { + if (getNumberOfLastLevelCaches() > 1) { peek(requestQueue_in, RequestMsg) { - enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm index 43e00a2d1..b4da617cc 100644 --- a/src/mem/protocol/MOESI_hammer-msg.sm +++ b/src/mem/protocol/MOESI_hammer-msg.sm @@ -24,6 +24,9 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * AMD's contributions to the MOESI hammer protocol do not constitute an + * endorsement of its similarity to any AMD products. */ // CoherenceRequestType @@ -65,7 +68,7 @@ structure(TriggerMsg, desc="...", interface="Message") { structure(RequestMsg, desc="...", interface="NetworkMessage") { Address Address, desc="Physical address for this request"; CoherenceRequestType Type, desc="Type of request (GetS, GetX, PutX, etc)"; - NodeID Requestor, desc="Node who initiated the request"; + MachineID Requestor, desc="Node who initiated the request"; NetDest Destination, desc="Multicast destination mask"; MessageSizeType MessageSize, desc="size category of the message"; } @@ -74,7 +77,7 @@ structure(RequestMsg, desc="...", interface="NetworkMessage") { structure(ResponseMsg, desc="...", interface="NetworkMessage") { Address Address, desc="Physical address for this request"; CoherenceResponseType Type, desc="Type of response (Ack, Data, etc)"; - NodeID Sender, desc="Node who sent the data"; + MachineID Sender, desc="Node who sent the data"; NetDest Destination, desc="Node to whom the data is sent"; DataBlock DataBlk, desc="data for the cache line"; bool Dirty, desc="Is the data dirty (different than memory)?"; diff --git a/src/mem/protocol/MOESI_hammer.slicc b/src/mem/protocol/MOESI_hammer.slicc index 8ff931e04..d49350e32 100644 --- a/src/mem/protocol/MOESI_hammer.slicc +++ b/src/mem/protocol/MOESI_hammer.slicc @@ -1,5 +1,4 @@ -../protocols/MOESI_hammer-msg.sm -../protocols/hammer-ni.sm -../protocols/MOESI_hammer-cache.sm -../protocols/MOESI_hammer-dir.sm -../protocols/standard_1level-node.sm +MOESI_hammer-msg.sm +MOESI_hammer-cache.sm +MOESI_hammer-dir.sm +standard_1level_CMP-protocol.sm diff --git a/src/mem/protocol/SConsopts b/src/mem/protocol/SConsopts index ded0814d2..7be9fd97e 100644 --- a/src/mem/protocol/SConsopts +++ b/src/mem/protocol/SConsopts @@ -47,6 +47,7 @@ all_protocols = [ 'MOSI_SMP_bcast_m', 'MOSI_SMP_directory_1level', 'MSI_MOSI_CMP_directory', + 'MOESI_hammer', ] opt = EnumVariable('PROTOCOL', 'Coherence Protocol for Ruby', 'MI_example', -- cgit v1.2.3 From bc12b8432d9c576d62f6bdaabb7d78c7703a2e34 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:32 -0800 Subject: ruby: Hammer ruby configuration support --- src/mem/protocol/MOESI_hammer-cache.sm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index 6fb5091af..3b2240800 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -143,7 +143,7 @@ machine(L1Cache, "AMD Hammer-like protocol") TBETable TBEs, template_hack=""; CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])'; CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])'; - CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])'; + CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["l2cache"])'; Entry getCacheEntry(Address addr), return_by_ref="yes" { if (L2cacheMemory.isTagPresent(addr)) { -- cgit v1.2.3 From dbb2c111cccacad4e331bfded3b316e3c78dc63c Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:32 -0800 Subject: ruby: Added a memory controller feature to MOESI hammer --- src/mem/protocol/MOESI_hammer-dir.sm | 250 +++++++++++++++++++++++++++++++---- src/mem/protocol/MOESI_hammer-msg.sm | 1 + 2 files changed, 222 insertions(+), 29 deletions(-) (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm index 0f7c58acd..49efaffb6 100644 --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -34,17 +34,16 @@ */ machine(Directory, "AMD Hammer-like protocol") -: int memory_controller_latency, - int memory_latency +: int memory_controller_latency { MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false"; MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; -// MessageBuffer dmaRequestFromDir, network="To", virtual_network="4", ordered="true"; + //MessageBuffer dmaRequestFromDir, network="To", virtual_network="4", ordered="true"; MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; MessageBuffer unblockToDir, network="From", virtual_network="0", ordered="false"; -// MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; + //MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; // STATES enumeration(State, desc="Directory states", default="Directory_State_E") { @@ -54,7 +53,13 @@ machine(Directory, "AMD Hammer-like protocol") E, desc="Exclusive Owner (we can provide the data in exclusive)"; NO_B, "NO^B", desc="Not Owner, Blocked"; O_B, "O^B", desc="Owner, Blocked"; + NO_B_W, desc="Not Owner, Blocked, waiting for Dram"; + O_B_W, desc="Owner, Blocked, waiting for Dram"; + NO_W, desc="Not Owner, waiting for Dram"; + O_W, desc="Owner, waiting for Dram"; WB, desc="Blocked on a writeback"; + WB_O_W, desc="Blocked on memory write, will go to O"; + WB_E_W, desc="Blocked on memory write, will go to E"; } // Events @@ -67,6 +72,10 @@ machine(Directory, "AMD Hammer-like protocol") Writeback_Dirty, desc="The final part of a PutX (data)"; Writeback_Exclusive_Clean, desc="The final part of a PutX (no data, exclusive)"; Writeback_Exclusive_Dirty, desc="The final part of a PutX (data, exclusive)"; + + // Memory Controller + Memory_Data, desc="Fetched data from memory arrives"; + Memory_Ack, desc="Writeback Ack from memory arrives"; } // TYPES @@ -82,15 +91,47 @@ machine(Directory, "AMD Hammer-like protocol") bool isPresent(Address); } + external_type(MemoryControl, inport="yes", outport="yes") { + + } + + // TBE entries for DMA requests + structure(TBE, desc="TBE entries for outstanding DMA requests") { + Address PhysicalAddress, desc="physical address"; + State TBEState, desc="Transient State"; + CoherenceResponseType ResponseType, desc="The type for the subsequent response message"; + DataBlock DataBlk, desc="Data to be written (DMA write only)"; + int Len, desc="..."; + MachineID DmaRequestor, desc="DMA requestor"; + } + + external_type(TBETable) { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } + // ** OBJECTS ** DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; + MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; + + TBETable TBEs, template_hack=""; + State getState(Address addr) { - return directory[addr].DirectoryState; + if (TBEs.isPresent(addr)) { + return TBEs[addr].TBEState; + } else { + return directory[addr].DirectoryState; + } } void setState(Address addr, State state) { + if (TBEs.isPresent(addr)) { + TBEs[addr].TBEState := state; + } directory[addr].DirectoryState := state; } @@ -99,6 +140,11 @@ machine(Directory, "AMD Hammer-like protocol") out_port(responseNetwork_out, ResponseMsg, responseFromDir); out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests + // + // Memory buffer for memory controller to DIMM communication + // + out_port(memQueue_out, MemoryMsg, memBuffer); + // ** IN_PORTS ** in_port(unblockNetwork_in, ResponseMsg, unblockToDir) { @@ -137,6 +183,22 @@ machine(Directory, "AMD Hammer-like protocol") } } + // off-chip memory request/response is done + in_port(memQueue_in, MemoryMsg, memBuffer) { + if (memQueue_in.isReady()) { + peek(memQueue_in, MemoryMsg) { + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:Memory_Data, in_msg.Address); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:Memory_Ack, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + // Actions action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") { @@ -163,14 +225,26 @@ machine(Directory, "AMD Hammer-like protocol") } } - action(d_sendData, "d", desc="Send data to requestor") { + action(v_allocateTBE, "v", desc="Allocate TBE") { peek(requestQueue_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { + TBEs.allocate(address); + TBEs[address].PhysicalAddress := address; + TBEs[address].ResponseType := CoherenceResponseType:NULL; + } + } + + action(w_deallocateTBE, "w", desc="Deallocate TBE") { + TBEs.deallocate(address); + } + + action(d_sendData, "d", desc="Send data to requestor") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; - out_msg.Type := CoherenceResponseType:DATA; + out_msg.Type := TBEs[address].ResponseType; out_msg.Sender := machineID; - out_msg.Destination.add(in_msg.Requestor); - out_msg.DataBlk := directory[in_msg.Address].DataBlk; + out_msg.Destination.add(in_msg.OriginalRequestorMachId); + out_msg.DataBlk := in_msg.DataBlk; out_msg.Dirty := false; // By definition, the block is now clean out_msg.Acks := 1; out_msg.MessageSize := MessageSizeType:Response_Data; @@ -178,21 +252,77 @@ machine(Directory, "AMD Hammer-like protocol") } } - action(dd_sendExclusiveData, "\d", desc="Send exclusive data to requestor") { + action(rx_recordExclusiveInTBE, "rx", desc="Record Exclusive in TBE") { peek(requestQueue_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { + TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; + } + } + + action(r_recordDataInTBE, "r", desc="Record Data in TBE") { + peek(requestQueue_in, RequestMsg) { + TBEs[address].ResponseType := CoherenceResponseType:DATA; + } + } + + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { + peek(requestQueue_in, RequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { out_msg.Address := address; - out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; + out_msg.Type := MemoryRequestType:MEMORY_READ; out_msg.Sender := machineID; - out_msg.Destination.add(in_msg.Requestor); + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; out_msg.DataBlk := directory[in_msg.Address].DataBlk; - out_msg.Dirty := false; // By definition, the block is now clean - out_msg.Acks := 1; - out_msg.MessageSize := MessageSizeType:Response_Data; + DEBUG_EXPR(out_msg); } } } +// action(qx_queueMemoryFetchExclusiveRequest, "xf", desc="Queue off-chip fetch request") { +// peek(requestQueue_in, RequestMsg) { +// enqueue(memQueue_out, MemoryMsg, latency=memory_request_latency) { +// out_msg.Address := address; +// out_msg.Type := MemoryRequestType:MEMORY_READ; +// out_msg.ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; +// out_msg.Sender := machineID; +// out_msg.OriginalRequestorMachId := in_msg.Requestor; +// out_msg.MessageSize := in_msg.MessageSize; +// out_msg.DataBlk := directory[in_msg.Address].DataBlk; +// DEBUG_EXPR(out_msg); +// } +// } +// } + +// action(d_sendData, "d", desc="Send data to requestor") { +// peek(requestQueue_in, RequestMsg) { +// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { +// out_msg.Address := address; +// out_msg.Type := CoherenceResponseType:DATA; +// out_msg.Sender := machineID; +// out_msg.Destination.add(in_msg.Requestor); +// out_msg.DataBlk := directory[in_msg.Address].DataBlk; +// out_msg.Dirty := false; // By definition, the block is now clean +// out_msg.Acks := 1; +// out_msg.MessageSize := MessageSizeType:Response_Data; +// } +// } +// } + +// action(dd_sendExclusiveData, "\d", desc="Send exclusive data to requestor") { +// peek(requestQueue_in, RequestMsg) { +// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { +// out_msg.Address := address; +// out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; +// out_msg.Sender := machineID; +// out_msg.Destination.add(in_msg.Requestor); +// out_msg.DataBlk := directory[in_msg.Address].DataBlk; +// out_msg.Dirty := false; // By definition, the block is now clean +// out_msg.Acks := 1; +// out_msg.MessageSize := MessageSizeType:Response_Data; +// } +// } +// } + action(f_forwardRequest, "f", desc="Forward requests") { if (getNumberOfLastLevelCaches() > 1) { peek(requestQueue_in, RequestMsg) { @@ -200,7 +330,7 @@ machine(Directory, "AMD Hammer-like protocol") out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.broadcast(); // Send to everyone, but... + out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor out_msg.MessageSize := MessageSizeType:Forwarded_Control; } @@ -216,6 +346,10 @@ machine(Directory, "AMD Hammer-like protocol") unblockNetwork_in.dequeue(); } + action(l_popMemQueue, "q", desc="Pop off-chip request queue") { + memQueue_in.dequeue(); + } + action(l_writeDataToMemory, "l", desc="Write PUTX/PUTO data to memory") { peek(unblockNetwork_in, ResponseMsg) { assert(in_msg.Dirty); @@ -226,6 +360,16 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") { + peek(unblockNetwork_in, ResponseMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + DEBUG_EXPR(out_msg); + } + } + } + action(ll_checkIncomingWriteback, "\l", desc="Check PUTX/PUTO response message") { peek(unblockNetwork_in, ResponseMsg) { assert(in_msg.Dirty == false); @@ -249,27 +393,35 @@ machine(Directory, "AMD Hammer-like protocol") // TRANSITIONS - transition(E, GETX, NO_B) { - dd_sendExclusiveData; + transition(E, GETX, NO_B_W) { + v_allocateTBE; + rx_recordExclusiveInTBE; + qf_queueMemoryFetchRequest; f_forwardRequest; i_popIncomingRequestQueue; } - transition(E, GETS, NO_B) { - dd_sendExclusiveData; + transition(E, GETS, NO_B_W) { + v_allocateTBE; + rx_recordExclusiveInTBE; + qf_queueMemoryFetchRequest; f_forwardRequest; i_popIncomingRequestQueue; } // - transition(O, GETX, NO_B) { - d_sendData; + transition(O, GETX, NO_B_W) { + v_allocateTBE; + r_recordDataInTBE; + qf_queueMemoryFetchRequest; f_forwardRequest; i_popIncomingRequestQueue; } - transition(O, GETS, O_B) { - d_sendData; + transition(O, GETS, O_B_W) { + v_allocateTBE; + r_recordDataInTBE; + qf_queueMemoryFetchRequest; f_forwardRequest; i_popIncomingRequestQueue; } @@ -296,7 +448,7 @@ machine(Directory, "AMD Hammer-like protocol") } // Blocked states - transition({NO_B, O_B, WB}, {GETS, GETX, PUT}) { + transition({NO_B, O_B, NO_B_W, O_B_W, NO_W, O_W, WB, WB_E_W, WB_O_W}, {GETS, GETX, PUT}) { zz_recycleRequest; } @@ -308,17 +460,57 @@ machine(Directory, "AMD Hammer-like protocol") j_popIncomingUnblockQueue; } + transition(NO_B_W, Memory_Data, NO_B) { + d_sendData; + w_deallocateTBE; + l_popMemQueue; + } + + transition(O_B_W, Memory_Data, O_B) { + d_sendData; + w_deallocateTBE; + l_popMemQueue; + } + + transition(NO_B_W, Unblock, NO_W) { + j_popIncomingUnblockQueue; + } + + transition(O_B_W, Unblock, O_W) { + j_popIncomingUnblockQueue; + } + + transition(NO_W, Memory_Data, NO) { + w_deallocateTBE; + l_popMemQueue; + } + + transition(O_W, Memory_Data, O) { + w_deallocateTBE; + l_popMemQueue; + } + // WB - transition(WB, Writeback_Dirty, O) { + transition(WB, Writeback_Dirty, WB_E_W) { l_writeDataToMemory; + l_queueMemoryWBRequest; j_popIncomingUnblockQueue; } - transition(WB, Writeback_Exclusive_Dirty, E) { + transition(WB, Writeback_Exclusive_Dirty, WB_O_W) { l_writeDataToMemory; + l_queueMemoryWBRequest; j_popIncomingUnblockQueue; } + transition(WB_E_W, Memory_Ack, E) { + l_popMemQueue; + } + + transition(WB_O_W, Memory_Ack, O) { + l_popMemQueue; + } + transition(WB, Writeback_Clean, O) { ll_checkIncomingWriteback; j_popIncomingUnblockQueue; diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm index b4da617cc..c9f146819 100644 --- a/src/mem/protocol/MOESI_hammer-msg.sm +++ b/src/mem/protocol/MOESI_hammer-msg.sm @@ -50,6 +50,7 @@ enumeration(CoherenceResponseType, desc="...") { WB_EXCLUSIVE_CLEAN, desc="Clean writeback of exclusive data"; WB_EXCLUSIVE_DIRTY, desc="Dirty writeback of exclusive data"; UNBLOCK, desc="Unblock"; + NULL, desc="Null value"; } // TriggerType -- cgit v1.2.3 From cef3c5616358912b45aafac490bf182c1a8def04 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:32 -0800 Subject: ruby: MOESI hammer support for DMA reads and writes --- src/mem/protocol/MOESI_hammer-dir.sm | 517 ++++++++++++++++++++++++++++++----- src/mem/protocol/MOESI_hammer-dma.sm | 165 +++++++++++ src/mem/protocol/MOESI_hammer-msg.sm | 32 +++ src/mem/protocol/MOESI_hammer.slicc | 1 + 4 files changed, 653 insertions(+), 62 deletions(-) create mode 100644 src/mem/protocol/MOESI_hammer-dma.sm (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm index 49efaffb6..b9b001e40 100644 --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -39,11 +39,17 @@ machine(Directory, "AMD Hammer-like protocol") MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false"; MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; - //MessageBuffer dmaRequestFromDir, network="To", virtual_network="4", ordered="true"; + // + // For a finite buffered network, note that the DMA response network only + // works at this relatively higher numbered (lower priority) virtual network + // because the trigger queue decouples cache responses from DMA responses. + // + MessageBuffer dmaResponseFromDir, network="To", virtual_network="4", ordered="true"; - MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; MessageBuffer unblockToDir, network="From", virtual_network="0", ordered="false"; - //MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; + MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false"; + MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; + MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; // STATES enumeration(State, desc="Directory states", default="Directory_State_E") { @@ -57,6 +63,13 @@ machine(Directory, "AMD Hammer-like protocol") O_B_W, desc="Owner, Blocked, waiting for Dram"; NO_W, desc="Not Owner, waiting for Dram"; O_W, desc="Owner, waiting for Dram"; + NO_DW_B_W, desc="Not Owner, Dma Write waiting for Dram and cache responses"; + NO_DR_B_W, desc="Not Owner, Dma Read waiting for Dram and cache responses"; + NO_DR_B_D, desc="Not Owner, Dma Read waiting for cache responses including dirty data"; + NO_DR_B, desc="Not Owner, Dma Read waiting for cache responses"; + NO_DW_W, desc="Not Owner, Dma Write waiting for Dram"; + O_DR_B_W, desc="Owner, Dma Read waiting for Dram and cache responses"; + O_DR_B, desc="Owner, Dma Read waiting for cache responses"; WB, desc="Blocked on a writeback"; WB_O_W, desc="Blocked on memory write, will go to O"; WB_E_W, desc="Blocked on memory write, will go to E"; @@ -73,9 +86,23 @@ machine(Directory, "AMD Hammer-like protocol") Writeback_Exclusive_Clean, desc="The final part of a PutX (no data, exclusive)"; Writeback_Exclusive_Dirty, desc="The final part of a PutX (data, exclusive)"; + // DMA requests + DMA_READ, desc="A DMA Read memory request"; + DMA_WRITE, desc="A DMA Write memory request"; + // Memory Controller Memory_Data, desc="Fetched data from memory arrives"; Memory_Ack, desc="Writeback Ack from memory arrives"; + + // Cache responses required to handle DMA + Ack, desc="Received an ack message"; + Shared_Ack, desc="Received an ack message, responder has a shared copy"; + Shared_Data, desc="Received a data message, responder has a shared copy"; + Exclusive_Data, desc="Received a data message, responder had an exclusive copy, they gave it to us"; + + // Triggers + All_acks_and_data, desc="Received all required data and message acks"; + All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy"; } // TYPES @@ -100,9 +127,13 @@ machine(Directory, "AMD Hammer-like protocol") Address PhysicalAddress, desc="physical address"; State TBEState, desc="Transient State"; CoherenceResponseType ResponseType, desc="The type for the subsequent response message"; - DataBlock DataBlk, desc="Data to be written (DMA write only)"; + DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory"; + DataBlock DataBlk, desc="The current view of system memory"; int Len, desc="..."; MachineID DmaRequestor, desc="DMA requestor"; + int NumPendingMsgs, desc="Number of pending acks/messages"; + bool CacheDirty, desc="Indicates whether a cache has responded with dirty data"; + bool Sharers, desc="Indicates whether a cache has indicated it is currently a sharer"; } external_type(TBETable) { @@ -135,10 +166,14 @@ machine(Directory, "AMD Hammer-like protocol") directory[addr].DirectoryState := state; } + MessageBuffer triggerQueue, ordered="true"; + // ** OUT_PORTS ** + out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests out_port(forwardNetwork_out, RequestMsg, forwardFromDir); out_port(responseNetwork_out, ResponseMsg, responseFromDir); - out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests + out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); // // Memory buffer for memory controller to DIMM communication @@ -147,6 +182,21 @@ machine(Directory, "AMD Hammer-like protocol") // ** IN_PORTS ** + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue) { + if (triggerQueue_in.isReady()) { + peek(triggerQueue_in, TriggerMsg) { + if (in_msg.Type == TriggerType:ALL_ACKS) { + trigger(Event:All_acks_and_data, in_msg.Address); + } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) { + trigger(Event:All_acks_and_data_no_sharers, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + in_port(unblockNetwork_in, ResponseMsg, unblockToDir) { if (unblockNetwork_in.isReady()) { peek(unblockNetwork_in, ResponseMsg) { @@ -167,6 +217,39 @@ machine(Directory, "AMD Hammer-like protocol") } } + // Response Network + in_port(responseToDir_in, ResponseMsg, responseToDir) { + if (responseToDir_in.isReady()) { + peek(responseToDir_in, ResponseMsg) { + if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_SHARED) { + trigger(Event:Shared_Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) { + trigger(Event:Shared_Data, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) { + trigger(Event:Exclusive_Data, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, DMARequestMsg) { + if (in_msg.Type == DMARequestType:READ) { + trigger(Event:DMA_READ, in_msg.LineAddress); + } else if (in_msg.Type == DMARequestType:WRITE) { + trigger(Event:DMA_WRITE, in_msg.LineAddress); + } else { + error("Invalid message"); + } + } + } + } + in_port(requestQueue_in, RequestMsg, requestToDir) { if (requestQueue_in.isReady()) { peek(requestQueue_in, RequestMsg) { @@ -233,10 +316,61 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaDataBlk := in_msg.DataBlk; + TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; + // + // One ack for each last-level cache + // + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); + // + // Assume initially that the caches store a clean copy and that memory + // will provide the data + // + TBEs[address].CacheDirty := false; + } + } + action(w_deallocateTBE, "w", desc="Deallocate TBE") { TBEs.deallocate(address); } + action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") { + peek(responseToDir_in, ResponseMsg) { + assert(in_msg.Acks > 0); + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + // + // Note that cache data responses will have an ack count of 2. However, + // directory DMA requests must wait for acks from all LLC caches, so + // only decrement by 1. + // + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - 1; + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + } + } + + action(n_popResponseQueue, "n", desc="Pop response queue") { + responseToDir_in.dequeue(); + } + + action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") { + if (TBEs[address].NumPendingMsgs == 0) { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.Address := address; + if (TBEs[address].Sharers) { + out_msg.Type := TriggerType:ALL_ACKS; + } else { + out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS; + } + } + } + } + action(d_sendData, "d", desc="Send data to requestor") { peek(memQueue_in, MemoryMsg) { enqueue(responseNetwork_out, ResponseMsg, latency="1") { @@ -252,18 +386,66 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(dr_sendDmaData, "dr", desc="Send Data to DMA controller from memory") { + peek(memQueue_in, MemoryMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dt_sendDmaDataFromTbe, "dt", desc="Send Data to DMA controller from tbe") { + peek(triggerQueue_in, TriggerMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:ACK; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + action(rx_recordExclusiveInTBE, "rx", desc="Record Exclusive in TBE") { peek(requestQueue_in, RequestMsg) { TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; } } - action(r_recordDataInTBE, "r", desc="Record Data in TBE") { + action(r_recordDataInTBE, "rt", desc="Record Data in TBE") { peek(requestQueue_in, RequestMsg) { TBEs[address].ResponseType := CoherenceResponseType:DATA; } } + action(r_setSharerBit, "r", desc="We saw other sharers") { + TBEs[address].Sharers := true; + } + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { peek(requestQueue_in, RequestMsg) { enqueue(memQueue_out, MemoryMsg, latency="1") { @@ -272,56 +454,25 @@ machine(Directory, "AMD Hammer-like protocol") out_msg.Sender := machineID; out_msg.OriginalRequestorMachId := in_msg.Requestor; out_msg.MessageSize := in_msg.MessageSize; - out_msg.DataBlk := directory[in_msg.Address].DataBlk; + out_msg.DataBlk := directory[address].DataBlk; DEBUG_EXPR(out_msg); } } } -// action(qx_queueMemoryFetchExclusiveRequest, "xf", desc="Queue off-chip fetch request") { -// peek(requestQueue_in, RequestMsg) { -// enqueue(memQueue_out, MemoryMsg, latency=memory_request_latency) { -// out_msg.Address := address; -// out_msg.Type := MemoryRequestType:MEMORY_READ; -// out_msg.ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; -// out_msg.Sender := machineID; -// out_msg.OriginalRequestorMachId := in_msg.Requestor; -// out_msg.MessageSize := in_msg.MessageSize; -// out_msg.DataBlk := directory[in_msg.Address].DataBlk; -// DEBUG_EXPR(out_msg); -// } -// } -// } - -// action(d_sendData, "d", desc="Send data to requestor") { -// peek(requestQueue_in, RequestMsg) { -// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { -// out_msg.Address := address; -// out_msg.Type := CoherenceResponseType:DATA; -// out_msg.Sender := machineID; -// out_msg.Destination.add(in_msg.Requestor); -// out_msg.DataBlk := directory[in_msg.Address].DataBlk; -// out_msg.Dirty := false; // By definition, the block is now clean -// out_msg.Acks := 1; -// out_msg.MessageSize := MessageSizeType:Response_Data; -// } -// } -// } - -// action(dd_sendExclusiveData, "\d", desc="Send exclusive data to requestor") { -// peek(requestQueue_in, RequestMsg) { -// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { -// out_msg.Address := address; -// out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; -// out_msg.Sender := machineID; -// out_msg.Destination.add(in_msg.Requestor); -// out_msg.DataBlk := directory[in_msg.Address].DataBlk; -// out_msg.Dirty := false; // By definition, the block is now clean -// out_msg.Acks := 1; -// out_msg.MessageSize := MessageSizeType:Response_Data; -// } -// } -// } + action(qd_queueMemoryRequestFromDmaRead, "qd", desc="Queue off-chip fetch request") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } action(f_forwardRequest, "f", desc="Forward requests") { if (getNumberOfLastLevelCaches() > 1) { @@ -338,6 +489,38 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(f_forwardWriteFromDma, "fw", desc="Forward requests") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + // + // Send to all L1 caches, since the requestor is the memory controller + // itself + // + out_msg.Requestor := machineID; + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + + action(f_forwardReadFromDma, "fr", desc="Forward requests") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + // + // Send to all L1 caches, since the requestor is the memory controller + // itself + // + out_msg.Requestor := machineID; + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") { requestQueue_in.dequeue(); } @@ -350,16 +533,52 @@ machine(Directory, "AMD Hammer-like protocol") memQueue_in.dequeue(); } + action(g_popTriggerQueue, "g", desc="Pop trigger queue") { + triggerQueue_in.dequeue(); + } + + action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") { + dmaRequestQueue_in.recycle(); + } + + action(r_recordMemoryData, "rd", desc="record data from memory to TBE") { + peek(memQueue_in, MemoryMsg) { + if (TBEs[address].CacheDirty == false) { + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + } + + action(r_recordCacheData, "rc", desc="record data from cache response to TBE") { + peek(responseToDir_in, ResponseMsg) { + TBEs[address].CacheDirty := true; + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + action(l_writeDataToMemory, "l", desc="Write PUTX/PUTO data to memory") { peek(unblockNetwork_in, ResponseMsg) { assert(in_msg.Dirty); assert(in_msg.MessageSize == MessageSizeType:Writeback_Data); - directory[in_msg.Address].DataBlk := in_msg.DataBlk; + directory[address].DataBlk := in_msg.DataBlk; DEBUG_EXPR(in_msg.Address); DEBUG_EXPR(in_msg.DataBlk); } } + action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { + directory[address].DataBlk := TBEs[address].DataBlk; + directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + } + + action(a_assertCacheData, "ac", desc="Assert that a cache provided the data") { + assert(TBEs[address].CacheDirty); + } + action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") { peek(unblockNetwork_in, ResponseMsg) { enqueue(memQueue_out, MemoryMsg, latency="1") { @@ -370,6 +589,18 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(ld_queueMemoryDmaWrite, "ld", desc="Write DMA data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + // then add the dma write data + out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + DEBUG_EXPR(out_msg); + } + } + action(ll_checkIncomingWriteback, "\l", desc="Check PUTX/PUTO response message") { peek(unblockNetwork_in, ResponseMsg) { assert(in_msg.Dirty == false); @@ -379,20 +610,17 @@ machine(Directory, "AMD Hammer-like protocol") // implementation. We include the data in the "dataless" // message so we can assert the clean data matches the datablock // in memory - assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk); + assert(directory[address].DataBlk == in_msg.DataBlk); } } - // action(z_stall, "z", desc="Cannot be handled right now.") { - // Special name recognized as do nothing case - // } - action(zz_recycleRequest, "\z", desc="Recycle the request queue") { requestQueue_in.recycle(); } // TRANSITIONS + // Transitions out of E state transition(E, GETX, NO_B_W) { v_allocateTBE; rx_recordExclusiveInTBE; @@ -409,7 +637,14 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } - // + transition(E, DMA_READ, NO_DR_B_W) { + vd_allocateDmaRequestInTBE; + qd_queueMemoryRequestFromDmaRead; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + // Transitions out of O state transition(O, GETX, NO_B_W) { v_allocateTBE; r_recordDataInTBE; @@ -426,7 +661,20 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } - // + transition(O, DMA_READ, O_DR_B_W) { + vd_allocateDmaRequestInTBE; + qd_queueMemoryRequestFromDmaRead; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + transition({E, O, NO}, DMA_WRITE, NO_DW_B_W) { + vd_allocateDmaRequestInTBE; + f_forwardWriteFromDma; + p_popDmaRequestQueue; + } + + // Transitions out of NO state transition(NO, GETX, NO_B) { f_forwardRequest; i_popIncomingRequestQueue; @@ -442,16 +690,33 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } + transition(NO, DMA_READ, NO_DR_B_D) { + vd_allocateDmaRequestInTBE; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + // Nack PUT requests when races cause us to believe we own the data transition({O, E}, PUT) { b_sendWriteBackNack; i_popIncomingRequestQueue; } - // Blocked states - transition({NO_B, O_B, NO_B_W, O_B_W, NO_W, O_W, WB, WB_E_W, WB_O_W}, {GETS, GETX, PUT}) { + // Blocked transient states + transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, + NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, + NO_W, O_W, WB, WB_E_W, WB_O_W}, + {GETS, GETX, PUT}) { zz_recycleRequest; } + transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, + NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, + NO_W, O_W, WB, WB_E_W, WB_O_W}, + {DMA_READ, DMA_WRITE}) { + y_recycleDmaRequestQueue; + } + transition(NO_B, Unblock, NO) { j_popIncomingUnblockQueue; } @@ -466,6 +731,134 @@ machine(Directory, "AMD Hammer-like protocol") l_popMemQueue; } + transition(NO_DR_B_W, Memory_Data, NO_DR_B) { + r_recordMemoryData; + o_checkForCompletion; + l_popMemQueue; + } + + transition(O_DR_B_W, Memory_Data, O_DR_B) { + r_recordMemoryData; + dr_sendDmaData; + o_checkForCompletion; + l_popMemQueue; + } + + transition({NO_DR_B, O_DR_B, NO_DR_B_D, NO_DW_B_W}, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Ack) { + m_decrementNumberOfMessages; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D}, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Shared_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D}, Shared_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Exclusive_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D, NO_DW_B_W}, Exclusive_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B, All_acks_and_data, O) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B_D, All_acks_and_data, O) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(O_DR_B, All_acks_and_data_no_sharers, O) { + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B, All_acks_and_data_no_sharers, E) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B_D, All_acks_and_data_no_sharers, E) { + a_assertCacheData; + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DW_B_W, All_acks_and_data_no_sharers, NO_DW_W) { + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWrite; + g_popTriggerQueue; + } + + transition(NO_DW_W, Memory_Ack, E) { + da_sendDmaAck; + w_deallocateTBE; + l_popMemQueue; + } + transition(O_B_W, Memory_Data, O_B) { d_sendData; w_deallocateTBE; @@ -490,7 +883,7 @@ machine(Directory, "AMD Hammer-like protocol") l_popMemQueue; } - // WB + // WB State Transistions transition(WB, Writeback_Dirty, WB_E_W) { l_writeDataToMemory; l_queueMemoryWBRequest; diff --git a/src/mem/protocol/MOESI_hammer-dma.sm b/src/mem/protocol/MOESI_hammer-dma.sm new file mode 100644 index 000000000..b217923a4 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer-dma.sm @@ -0,0 +1,165 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +machine(DMA, "DMA Controller") +: int request_latency +{ + + MessageBuffer responseFromDir, network="From", virtual_network="4", ordered="true", no_vector="true"; + MessageBuffer reqToDirectory, network="To", virtual_network="5", ordered="false", no_vector="true"; + + enumeration(State, desc="DMA states", default="DMA_State_READY") { + READY, desc="Ready to accept a new request"; + BUSY_RD, desc="Busy: currently processing a request"; + BUSY_WR, desc="Busy: currently processing a request"; + } + + enumeration(Event, desc="DMA events") { + ReadRequest, desc="A new read request"; + WriteRequest, desc="A new write request"; + Data, desc="Data from a DMA memory read"; + Ack, desc="DMA write to memory completed"; + } + + external_type(DMASequencer) { + void ackCallback(); + void dataCallback(DataBlock); + } + + MessageBuffer mandatoryQueue, ordered="false", no_vector="true"; + DMASequencer dma_sequencer, factory='RubySystem::getDMASequencer(m_cfg["dma_sequencer"])', no_vector="true"; + State cur_state, no_vector="true"; + + State getState(Address addr) { + return cur_state; + } + void setState(Address addr, State state) { + cur_state := state; + } + + out_port(reqToDirectory_out, DMARequestMsg, reqToDirectory, desc="..."); + + in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, SequencerMsg) { + if (in_msg.Type == SequencerRequestType:LD ) { + trigger(Event:ReadRequest, in_msg.LineAddress); + } else if (in_msg.Type == SequencerRequestType:ST) { + trigger(Event:WriteRequest, in_msg.LineAddress); + } else { + error("Invalid request type"); + } + } + } + } + + in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") { + if (dmaResponseQueue_in.isReady()) { + peek( dmaResponseQueue_in, DMAResponseMsg) { + if (in_msg.Type == DMAResponseType:ACK) { + trigger(Event:Ack, in_msg.LineAddress); + } else if (in_msg.Type == DMAResponseType:DATA) { + trigger(Event:Data, in_msg.LineAddress); + } else { + error("Invalid response type"); + } + } + } + } + + action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:READ; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:WRITE; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.ackCallback(); + } + } + + action(d_dataCallback, "d", desc="Write data to dma sequencer") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.dataCallback(in_msg.DataBlk); + } + } + + action(p_popRequestQueue, "p", desc="Pop request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(p_popResponseQueue, "\p", desc="Pop request queue") { + dmaResponseQueue_in.dequeue(); + } + + transition(READY, ReadRequest, BUSY_RD) { + s_sendReadRequest; + p_popRequestQueue; + } + + transition(READY, WriteRequest, BUSY_WR) { + s_sendWriteRequest; + p_popRequestQueue; + } + + transition(BUSY_RD, Data, READY) { + d_dataCallback; + p_popResponseQueue; + } + + transition(BUSY_WR, Ack, READY) { + a_ackCallback; + p_popResponseQueue; + } +} diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm index c9f146819..5d8226eb6 100644 --- a/src/mem/protocol/MOESI_hammer-msg.sm +++ b/src/mem/protocol/MOESI_hammer-msg.sm @@ -85,3 +85,35 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") { int Acks, desc="How many messages this counts as"; MessageSizeType MessageSize, desc="size category of the message"; } + +enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") { + READ, desc="Memory Read"; + WRITE, desc="Memory Write"; + NULL, desc="Invalid"; +} + +enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") { + DATA, desc="DATA read"; + ACK, desc="ACK write"; + NULL, desc="Invalid"; +} + +structure(DMARequestMsg, desc="...", interface="NetworkMessage") { + DMARequestType Type, desc="Request type (read/write)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + int Len, desc="The length of the request"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +structure(DMAResponseMsg, desc="...", interface="NetworkMessage") { + DMAResponseType Type, desc="Response type (DATA/ACK)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + MessageSizeType MessageSize, desc="size category of the message"; +} diff --git a/src/mem/protocol/MOESI_hammer.slicc b/src/mem/protocol/MOESI_hammer.slicc index d49350e32..31ad47c2e 100644 --- a/src/mem/protocol/MOESI_hammer.slicc +++ b/src/mem/protocol/MOESI_hammer.slicc @@ -1,4 +1,5 @@ MOESI_hammer-msg.sm MOESI_hammer-cache.sm MOESI_hammer-dir.sm +MOESI_hammer-dma.sm standard_1level_CMP-protocol.sm -- cgit v1.2.3 From 5d8a669539a142ece820cf0c82722ea1c755d7cd Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:33 -0800 Subject: Resurrection of the CMP token protocol to GEM5 --- src/mem/protocol/MOESI_CMP_token-L1cache.sm | 353 ++++++++---- src/mem/protocol/MOESI_CMP_token-L2cache.sm | 174 +++--- src/mem/protocol/MOESI_CMP_token-dir.sm | 866 +++++++++++++++++++++++++--- src/mem/protocol/MOESI_CMP_token-dma.sm | 165 ++++++ src/mem/protocol/MOESI_CMP_token-msg.sm | 54 +- src/mem/protocol/MOESI_CMP_token.slicc | 1 + src/mem/protocol/RubySlicc_Util.sm | 1 - 7 files changed, 1335 insertions(+), 279 deletions(-) create mode 100644 src/mem/protocol/MOESI_CMP_token-dma.sm (limited to 'src/mem/protocol') diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm index ab58c5c00..3fb4a8862 100644 --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -32,21 +32,32 @@ * */ -machine(L1Cache, "Token protocol") { +machine(L1Cache, "Token protocol") + : int l1_request_latency, + int l1_response_latency, + int l2_select_low_bit, + int l2_select_num_bits, + int N_tokens, + int retry_threshold, + int fixed_timeout_latency, + bool dynamic_timeout_enabled +{ // From this node's L1 cache TO the network - // a local L1 -> this L2 bank, currently ordered with directory forwarded requests - MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false"; + // a local L1 -> this L2 bank - MessageBuffer responseFromL1Cache, network="To", virtual_network="2", ordered="false"; - MessageBuffer persistentFromL1Cache, network="To", virtual_network="3", ordered="true"; + MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false"; + MessageBuffer persistentFromL1Cache, network="To", virtual_network="2", ordered="true"; + // a local L1 -> this L2 bank, currently ordered with directory forwarded requests + MessageBuffer requestFromL1Cache, network="To", virtual_network="4", ordered="false"; + // To this node's L1 cache FROM the network // a L2 bank -> this L1 - MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false"; + MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToL1Cache, network="From", virtual_network="2", ordered="true"; // a L2 bank -> this L1 - MessageBuffer responseToL1Cache, network="From", virtual_network="2", ordered="false"; - MessageBuffer persistentToL1Cache, network="From", virtual_network="3", ordered="true"; + MessageBuffer requestToL1Cache, network="From", virtual_network="4", ordered="false"; // STATES enumeration(State, desc="Cache states", default="L1Cache_State_I") { @@ -111,10 +122,6 @@ machine(L1Cache, "Token protocol") { // TYPES - int getRetryThreshold(); - int getFixedTimeoutLatency(); - bool getDynamicTimeoutEnabled(); - // CacheEntry structure(Entry, desc="...", interface="AbstractCacheEntry") { State CacheState, desc="cache state"; @@ -143,7 +150,7 @@ machine(L1Cache, "Token protocol") { external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); @@ -157,17 +164,28 @@ machine(L1Cache, "Token protocol") { bool isPresent(Address); } + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + bool okToIssueStarving(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } TBETable L1_TBEs, template_hack=""; - CacheMemory L1IcacheMemory, template_hack="", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1I"', abstract_chip_ptr="true"; - CacheMemory L1DcacheMemory, template_hack="", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1D"', abstract_chip_ptr="true"; + CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])'; + CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])'; MessageBuffer mandatoryQueue, ordered="false", abstract_chip_ptr="true"; - Sequencer sequencer, abstract_chip_ptr="true", constructor_hack="i"; + Sequencer sequencer, factory='RubySystem::getSequencer(m_cfg["sequencer"])'; bool starving, default="false"; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; TimerTable useTimerTable; TimerTable reissueTimerTable; @@ -175,11 +193,11 @@ machine(L1Cache, "Token protocol") { int outstandingPersistentRequests, default="0"; int averageLatencyHysteresis, default="(8)"; // Constant that provides hysteresis for calculated the estimated average - int averageLatencyCounter, default="(500 << (*m_L1Cache_averageLatencyHysteresis_vec[i]))"; + int averageLatencyCounter, default="(500 << (*m_L1Cache_averageLatencyHysteresis_ptr))"; int averageLatencyEstimate() { DEBUG_EXPR( (averageLatencyCounter >> averageLatencyHysteresis) ); - profile_average_latency_estimate( (averageLatencyCounter >> averageLatencyHysteresis) ); + //profile_average_latency_estimate( (averageLatencyCounter >> averageLatencyHysteresis) ); return averageLatencyCounter >> averageLatencyHysteresis; } @@ -366,30 +384,33 @@ machine(L1Cache, "Token protocol") { } } - GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) { - if (machineIDToMachineType(sender) == MachineType:L1Cache) { - return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this - } else if (machineIDToMachineType(sender) == MachineType:L2Cache) { - if ( sender == (map_L1CacheMachId_to_L2Cache(addr,machineID))) { - return GenericMachineType:L2Cache; - } else { - return GenericMachineType:L2Cache_wCC; - } - } else { - return ConvertMachToGenericMach(machineIDToMachineType(sender)); - } - } - - bool okToIssueStarving(Address addr) { - return persistentTable.okToIssueStarving(addr); +// GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) { +// if (machineIDToMachineType(sender) == MachineType:L1Cache) { +// return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this +// } else if (machineIDToMachineType(sender) == MachineType:L2Cache) { +// +// if (sender == (mapAddressToRange(addr, +// MachineType:L2Cache, +// l2_select_low_bit, +// l2_select_num_bits))) { +// +// return GenericMachineType:L2Cache; +// } else { +// return GenericMachineType:L2Cache_wCC; +// } +// } else { +// return ConvertMachToGenericMach(machineIDToMachineType(sender)); +// } +// } + + bool okToIssueStarving(Address addr, MachineID machinID) { + return persistentTable.okToIssueStarving(addr, machineID); } void markPersistentEntries(Address addr) { persistentTable.markEntries(addr); } - MessageBuffer triggerQueue, ordered="false", random="false"; - // ** OUT_PORTS ** out_port(persistentNetwork_out, PersistentMsg, persistentFromL1Cache); out_port(requestNetwork_out, RequestMsg, requestFromL1Cache); @@ -507,7 +528,11 @@ machine(L1Cache, "Token protocol") { // Mark TBE flag if response received off-chip. Use this to update average latency estimate if ( in_msg.SenderMachine == MachineType:L2Cache ) { - if (in_msg.Sender == map_L1CacheMachId_to_L2Cache(in_msg.Address, machineID)) { + if (in_msg.Sender == mapAddressToRange(in_msg.Address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)) { + // came from an off-chip L2 cache if (L1_TBEs.isPresent(in_msg.Address)) { // L1_TBEs[in_msg.Address].ExternalResponse := true; @@ -523,15 +548,15 @@ machine(L1Cache, "Token protocol") { // profile_memory_response( in_msg.Address); } } else if ( in_msg.SenderMachine == MachineType:L1Cache) { - if (isLocalProcessor(machineID, in_msg.Sender) == false) { - if (L1_TBEs.isPresent(in_msg.Address)) { + //if (isLocalProcessor(machineID, in_msg.Sender) == false) { + //if (L1_TBEs.isPresent(in_msg.Address)) { // L1_TBEs[in_msg.Address].ExternalResponse := true; // profile_offchipL1_response(in_msg.Address ); - } - } - else { + //} + //} + //else { // profile_onchipL1_response(in_msg.Address ); - } + //} } else { error("unexpected SenderMachine"); } @@ -570,42 +595,42 @@ machine(L1Cache, "Token protocol") { // ** INSTRUCTION ACCESS *** // Check to see if it is in the OTHER L1 - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.Address); + trigger(Event:L1_Replacement, in_msg.LineAddress); } - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1IcacheMemory.cacheAvail(in_msg.Address)) { + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { // No room in the L1, so we need to make room - trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.LineAddress)); } } } else { // *** DATA ACCESS *** // Check to see if it is in the OTHER L1 - if (L1IcacheMemory.isTagPresent(in_msg.Address)) { + if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) { // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.Address); + trigger(Event:L1_Replacement, in_msg.LineAddress); } - if (L1DcacheMemory.isTagPresent(in_msg.Address)) { + if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { - if (L1DcacheMemory.cacheAvail(in_msg.Address)) { + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 - trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address); + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress); } else { // No room in the L1, so we need to make room - trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.Address)); + trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.LineAddress)); } } } @@ -618,19 +643,31 @@ machine(L1Cache, "Token protocol") { action(a_issueReadRequest, "a", desc="Issue GETS") { if (L1_TBEs[address].IssueCount == 0) { // Update outstanding requests - profile_outstanding_request(outstandingRequests); + //profile_outstanding_request(outstandingRequests); outstandingRequests := outstandingRequests + 1; } - if (L1_TBEs[address].IssueCount >= getRetryThreshold() ) { + if (L1_TBEs[address].IssueCount >= retry_threshold) { // Issue a persistent request if possible - if (okToIssueStarving(address) && (starving == false)) { - enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := PersistentRequestType:GETS_PERSISTENT; out_msg.Requestor := machineID; out_msg.Destination.broadcast(MachineType:L1Cache); - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := L1_TBEs[address].Prefetch; @@ -640,11 +677,11 @@ machine(L1Cache, "Token protocol") { starving := true; if (L1_TBEs[address].IssueCount == 0) { - profile_persistent_prediction(address, L1_TBEs[address].AccessType); + //profile_persistent_prediction(address, L1_TBEs[address].AccessType); } // Update outstanding requests - profile_outstanding_persistent_request(outstandingPersistentRequests); + //profile_outstanding_persistent_request(outstandingPersistentRequests); outstandingPersistentRequests := outstandingPersistentRequests + 1; // Increment IssueCount @@ -666,11 +703,16 @@ machine(L1Cache, "Token protocol") { } } else { // Make a normal request - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.RetryNum := L1_TBEs[address].IssueCount; if (L1_TBEs[address].IssueCount == 0) { out_msg.MessageSize := MessageSizeType:Request_Control; @@ -682,11 +724,18 @@ machine(L1Cache, "Token protocol") { } // send to other local L1s, with local bit set - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; - out_msg.Destination := getOtherLocalL1IDs(machineID); + + // + // Since only one chip, assuming all L1 caches are local + // + //out_msg.Destination := getOtherLocalL1IDs(machineID); + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.remove(machineID); + out_msg.RetryNum := L1_TBEs[address].IssueCount; out_msg.isLocal := true; if (L1_TBEs[address].IssueCount == 0) { @@ -703,10 +752,10 @@ machine(L1Cache, "Token protocol") { // Set a wakeup timer - if (getDynamicTimeoutEnabled()) { + if (dynamic_timeout_enabled) { reissueTimerTable.set(address, 1.25 * averageLatencyEstimate()); } else { - reissueTimerTable.set(address, getFixedTimeoutLatency()); + reissueTimerTable.set(address, fixed_timeout_latency); } } @@ -716,20 +765,32 @@ machine(L1Cache, "Token protocol") { if (L1_TBEs[address].IssueCount == 0) { // Update outstanding requests - profile_outstanding_request(outstandingRequests); + //profile_outstanding_request(outstandingRequests); outstandingRequests := outstandingRequests + 1; } - if (L1_TBEs[address].IssueCount >= getRetryThreshold() ) { + if (L1_TBEs[address].IssueCount >= retry_threshold) { // Issue a persistent request if possible - if ( okToIssueStarving(address) && (starving == false)) { - enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") { + if ( okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := PersistentRequestType:GETX_PERSISTENT; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L1Cache; out_msg.Destination.broadcast(MachineType:L1Cache); - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := L1_TBEs[address].Prefetch; @@ -739,11 +800,11 @@ machine(L1Cache, "Token protocol") { starving := true; // Update outstanding requests - profile_outstanding_persistent_request(outstandingPersistentRequests); + //profile_outstanding_persistent_request(outstandingPersistentRequests); outstandingPersistentRequests := outstandingPersistentRequests + 1; if (L1_TBEs[address].IssueCount == 0) { - profile_persistent_prediction(address, L1_TBEs[address].AccessType); + //profile_persistent_prediction(address, L1_TBEs[address].AccessType); } // Increment IssueCount @@ -766,12 +827,17 @@ machine(L1Cache, "Token protocol") { } else { // Make a normal request - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L1Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.RetryNum := L1_TBEs[address].IssueCount; if (L1_TBEs[address].IssueCount == 0) { @@ -784,12 +850,19 @@ machine(L1Cache, "Token protocol") { } // send to other local L1s too - enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestNetwork_out, RequestMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; out_msg.isLocal := true; - out_msg.Destination := getOtherLocalL1IDs(machineID); + + // + // Since only one chip, assuming all L1 caches are local + // + //out_msg.Destination := getOtherLocalL1IDs(machineID); + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.remove(machineID); + out_msg.RetryNum := L1_TBEs[address].IssueCount; if (L1_TBEs[address].IssueCount == 0) { out_msg.MessageSize := MessageSizeType:Request_Control; @@ -807,10 +880,10 @@ machine(L1Cache, "Token protocol") { DEBUG_EXPR(L1_TBEs[address].IssueCount); // Set a wakeup timer - if (getDynamicTimeoutEnabled()) { + if (dynamic_timeout_enabled) { reissueTimerTable.set(address, 1.25 * averageLatencyEstimate()); } else { - reissueTimerTable.set(address, getFixedTimeoutLatency()); + reissueTimerTable.set(address, fixed_timeout_latency); } } } @@ -818,7 +891,7 @@ machine(L1Cache, "Token protocol") { action(bb_bounceResponse, "\b", desc="Bounce tokens and data to memory") { peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -833,11 +906,16 @@ machine(L1Cache, "Token protocol") { } action(c_ownedReplacement, "c", desc="Issue writeback") { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Tokens := getCacheEntry(address).Tokens; out_msg.DataBlk := getCacheEntry(address).DataBlk; out_msg.Dirty := getCacheEntry(address).Dirty; @@ -853,11 +931,16 @@ machine(L1Cache, "Token protocol") { // don't send writeback if replacing block with no tokens if (getCacheEntry(address).Tokens != 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Tokens := getCacheEntry(address).Tokens; out_msg.DataBlk := getCacheEntry(address).DataBlk; // assert(getCacheEntry(address).Dirty == false); @@ -879,7 +962,7 @@ machine(L1Cache, "Token protocol") { action(d_sendDataWithToken, "d", desc="Send data and a token from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -902,14 +985,14 @@ machine(L1Cache, "Token protocol") { action(d_sendDataWithNTokenIfAvail, "\dd", desc="Send data and a token from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - if (getCacheEntry(address).Tokens > N_tokens()) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + if (getCacheEntry(address).Tokens > N_tokens) { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(in_msg.Requestor); - out_msg.Tokens := N_tokens(); + out_msg.Tokens := N_tokens; out_msg.DataBlk := getCacheEntry(address).DataBlk; // out_msg.Dirty := getCacheEntry(address).Dirty; out_msg.Dirty := false; @@ -919,10 +1002,10 @@ machine(L1Cache, "Token protocol") { out_msg.MessageSize := MessageSizeType:Response_Data; } } - getCacheEntry(address).Tokens := getCacheEntry(address).Tokens - N_tokens(); + getCacheEntry(address).Tokens := getCacheEntry(address).Tokens - N_tokens; } else if (getCacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -946,7 +1029,7 @@ machine(L1Cache, "Token protocol") { action(dd_sendDataWithAllTokens, "\d", desc="Send data and all tokens from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -969,7 +1052,7 @@ machine(L1Cache, "Token protocol") { action(e_sendAckWithCollectedTokens, "e", desc="Send ack with the tokens we've collected thus far.") { // assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself if (getCacheEntry(address).Tokens > 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -986,7 +1069,7 @@ machine(L1Cache, "Token protocol") { action(ee_sendDataWithAllTokens, "\e", desc="Send data and all tokens from cache to starver") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getCacheEntry(address).Tokens > 0); - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -1005,23 +1088,23 @@ machine(L1Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getCacheEntry(address).Tokens > 0); if (getCacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(persistentTable.findSmallest(address)); assert(getCacheEntry(address).Tokens >= 1); - if (getCacheEntry(address).Tokens > N_tokens()) { - out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens; } else { out_msg.Tokens := getCacheEntry(address).Tokens - 1; } out_msg.MessageSize := MessageSizeType:Response_Control; } } - if (getCacheEntry(address).Tokens > N_tokens()) { - getCacheEntry(address).Tokens := N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + getCacheEntry(address).Tokens := N_tokens; } else { getCacheEntry(address).Tokens := 1; } @@ -1031,15 +1114,15 @@ machine(L1Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getCacheEntry(address).Tokens > 0); if (getCacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(persistentTable.findSmallest(address)); assert(getCacheEntry(address).Tokens >= 1); - if (getCacheEntry(address).Tokens > N_tokens()) { - out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + out_msg.Tokens := getCacheEntry(address).Tokens - N_tokens; } else { out_msg.Tokens := getCacheEntry(address).Tokens - 1; } @@ -1047,8 +1130,8 @@ machine(L1Cache, "Token protocol") { out_msg.Dirty := getCacheEntry(address).Dirty; out_msg.MessageSize := MessageSizeType:Response_Data; } - if (getCacheEntry(address).Tokens > N_tokens()) { - getCacheEntry(address).Tokens := N_tokens(); + if (getCacheEntry(address).Tokens > N_tokens) { + getCacheEntry(address).Tokens := N_tokens; } else { getCacheEntry(address).Tokens := 1; } @@ -1061,7 +1144,7 @@ machine(L1Cache, "Token protocol") { peek(responseNetwork_in, ResponseMsg) { // assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -1079,7 +1162,8 @@ machine(L1Cache, "Token protocol") { action(h_load_hit, "h", desc="Notify sequencer the load completed.") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + //sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + sequencer.readCallback(address, getCacheEntry(address).DataBlk); } action(x_external_load_hit, "x", desc="Notify sequencer the load completed.") { @@ -1087,14 +1171,16 @@ machine(L1Cache, "Token protocol") { DEBUG_EXPR(getCacheEntry(address).DataBlk); peek(responseNetwork_in, ResponseMsg) { - sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + //sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + sequencer.readCallback(address, getCacheEntry(address).DataBlk); } } action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); + sequencer.writeCallback(address, getCacheEntry(address).DataBlk); getCacheEntry(address).Dirty := true; DEBUG_EXPR(getCacheEntry(address).DataBlk); } @@ -1103,7 +1189,8 @@ machine(L1Cache, "Token protocol") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); peek(responseNetwork_in, ResponseMsg) { - sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); + sequencer.writeCallback(address, getCacheEntry(address).DataBlk); } getCacheEntry(address).Dirty := true; DEBUG_EXPR(getCacheEntry(address).DataBlk); @@ -1133,8 +1220,6 @@ machine(L1Cache, "Token protocol") { useTimerTable.unset(address); } - - action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") { mandatoryQueue_in.dequeue(); } @@ -1156,14 +1241,19 @@ machine(L1Cache, "Token protocol") { } action(p_informL2AboutTokenLoss, "p", desc="Inform L2 about loss of all tokens") { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:INV; out_msg.Tokens := 0; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L1Cache; out_msg.DestMachine := MachineType:L2Cache; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Response_Control; } } @@ -1189,13 +1279,25 @@ machine(L1Cache, "Token protocol") { if (L1_TBEs[address].WentPersistent) { // assert(starving == true); outstandingRequests := outstandingRequests - 1; - enqueue(persistentNetwork_out, PersistentMsg, latency="L1_REQUEST_LATENCY") { + enqueue(persistentNetwork_out, PersistentMsg, latency = l1_request_latency) { out_msg.Address := address; out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L1Cache; out_msg.Destination.broadcast(MachineType:L1Cache); - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Persistent_Control; } @@ -1217,14 +1319,14 @@ machine(L1Cache, "Token protocol") { // profile_token_retry(address, L1_TBEs[address].AccessType, 1); //} - profile_token_retry(address, L1_TBEs[address].AccessType, L1_TBEs[address].IssueCount); + //profile_token_retry(address, L1_TBEs[address].AccessType, L1_TBEs[address].IssueCount); L1_TBEs.deallocate(address); } action(t_sendAckWithCollectedTokens, "t", desc="Send ack with the tokens we've collected thus far.") { if (getCacheEntry(address).Tokens > 0) { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency = l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -1259,13 +1361,13 @@ machine(L1Cache, "Token protocol") { action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") { if (L1DcacheMemory.isTagPresent(address) == false) { - L1DcacheMemory.allocate(address); + L1DcacheMemory.allocate(address, new Entry); } } action(pp_allocateL1ICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") { if (L1IcacheMemory.isTagPresent(address) == false) { - L1IcacheMemory.allocate(address); + L1IcacheMemory.allocate(address, new Entry); } } @@ -1281,11 +1383,6 @@ machine(L1Cache, "Token protocol") { } } - - action(z_stall, "z", desc="Stall") { - - } - action(zz_recycleMandatoryQueue, "\z", desc="Send the head of the mandatory queue to the back of the queue.") { mandatoryQueue_in.recycle(); } diff --git a/src/mem/protocol/MOESI_CMP_token-L2cache.sm b/src/mem/protocol/MOESI_CMP_token-L2cache.sm index 0a58ed5cf..9a5c400f2 100644 --- a/src/mem/protocol/MOESI_CMP_token-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L2cache.sm @@ -32,20 +32,33 @@ * */ -machine(L2Cache, "Token protocol") { +machine(L2Cache, "Token protocol") + : int l2_request_latency, + int l2_response_latency, + int N_tokens, + bool filtering_enabled +{ // L2 BANK QUEUES // From local bank of L2 cache TO the network - MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="0", ordered="false"; // this L2 bank -> a local L1 - MessageBuffer GlobalRequestFromL2Cache, network="To", virtual_network="1", ordered="false"; // this L2 bank -> mod-directory - MessageBuffer responseFromL2Cache, network="To", virtual_network="2", ordered="false"; // this L2 bank -> a local L1 || mod-directory + + // this L2 bank -> a local L1 || mod-directory + MessageBuffer responseFromL2Cache, network="To", virtual_network="1", ordered="false"; + // this L2 bank -> mod-directory + MessageBuffer GlobalRequestFromL2Cache, network="To", virtual_network="3", ordered="false"; + // this L2 bank -> a local L1 + MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="4", ordered="false"; // FROM the network to this local bank of L2 cache - MessageBuffer L1RequestToL2Cache, network="From", virtual_network="0", ordered="false"; // a local L1 -> this L2 bank - MessageBuffer GlobalRequestToL2Cache, network="From", virtual_network="1", ordered="false"; // mod-directory -> this L2 bank - MessageBuffer responseToL2Cache, network="From", virtual_network="2", ordered="false"; // a local L1 || mod-directory -> this L2 bank - MessageBuffer persistentToL2Cache, network="From", virtual_network="3", ordered="true"; + + // a local L1 || mod-directory -> this L2 bank + MessageBuffer responseToL2Cache, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToL2Cache, network="From", virtual_network="2", ordered="true"; + // mod-directory -> this L2 bank + MessageBuffer GlobalRequestToL2Cache, network="From", virtual_network="3", ordered="false"; + // a local L1 -> this L2 bank + MessageBuffer L1RequestToL2Cache, network="From", virtual_network="4", ordered="false"; // STATES enumeration(State, desc="L2 Cache states", default="L2Cache_State_I") { @@ -107,8 +120,6 @@ machine(L2Cache, "Token protocol") { DataBlock DataBlk, desc="data for the block"; } - - structure(DirEntry, desc="...") { Set Sharers, desc="Set of the internal processors that want the block in shared state"; bool exclusive, default="false", desc="if local exclusive is likely"; @@ -117,7 +128,7 @@ machine(L2Cache, "Token protocol") { external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); @@ -132,19 +143,28 @@ machine(L2Cache, "Token protocol") { bool isTagPresent(Address); } + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } - CacheMemory L2cacheMemory, template_hack="", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,MachineType_L2Cache,int_to_string(i)+"_L2"'; + CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])'; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; PerfectCacheMemory localDirectory, template_hack=""; - - bool getFilteringEnabled(); - Entry getL2CacheEntry(Address addr), return_by_ref="yes" { if (L2cacheMemory.isTagPresent(addr)) { return L2cacheMemory[addr]; } + assert(false); + return L2cacheMemory[addr]; } int getTokens(Address addr) { @@ -465,15 +485,21 @@ machine(L2Cache, "Token protocol") { // if this is a retry or no local sharers, broadcast normally // if (in_msg.RetryNum > 0 || (in_msg.Type == CoherenceRequestType:GETX && exclusiveExists(in_msg.Address) == false) || (in_msg.Type == CoherenceRequestType:GETS && sharersExist(in_msg.Address) == false)) { - enqueue(globalRequestNetwork_out, RequestMsg, latency="L2_REQUEST_LATENCY") { + enqueue(globalRequestNetwork_out, RequestMsg, latency=l2_request_latency) { out_msg.Address := in_msg.Address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; out_msg.RequestorMachine := in_msg.RequestorMachine; - //out_msg.Destination.broadcast(MachineType:L2Cache); out_msg.RetryNum := in_msg.RetryNum; - out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); - out_msg.Destination.remove(map_L1CacheMachId_to_L2Cache(address, in_msg.Requestor)); + + // + // If a statically shared L2 cache, then no other L2 caches can + // store the block + // + //out_msg.Destination.broadcast(MachineType:L2Cache); + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + //out_msg.Destination.remove(map_L1CacheMachId_to_L2Cache(address, in_msg.Requestor)); + out_msg.Destination.add(map_Address_to_Directory(address)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.AccessMode := in_msg.AccessMode; @@ -489,7 +515,7 @@ machine(L2Cache, "Token protocol") { action(bb_bounceResponse, "\b", desc="Bounce tokens and data to memory") { peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -505,7 +531,7 @@ machine(L2Cache, "Token protocol") { action(c_cleanReplacement, "c", desc="Issue clean writeback") { if (getL2CacheEntry(address).Tokens > 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -519,7 +545,7 @@ machine(L2Cache, "Token protocol") { } action(cc_dirtyReplacement, "\c", desc="Issue dirty writeback") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; @@ -541,22 +567,22 @@ machine(L2Cache, "Token protocol") { action(d_sendDataWithTokens, "d", desc="Send data and a token from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - if (getL2CacheEntry(address).Tokens > N_tokens()) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + if (getL2CacheEntry(address).Tokens > N_tokens) { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; out_msg.Destination.add(in_msg.Requestor); - out_msg.Tokens := N_tokens(); + out_msg.Tokens := N_tokens; out_msg.DataBlk := getL2CacheEntry(address).DataBlk; out_msg.Dirty := false; out_msg.MessageSize := MessageSizeType:Response_Data; } - getL2CacheEntry(address).Tokens := getL2CacheEntry(address).Tokens - N_tokens(); + getL2CacheEntry(address).Tokens := getL2CacheEntry(address).Tokens - N_tokens; } else { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -574,7 +600,7 @@ machine(L2Cache, "Token protocol") { action(dd_sendDataWithAllTokens, "\d", desc="Send data and all tokens from cache to requestor") { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -592,7 +618,7 @@ machine(L2Cache, "Token protocol") { action(e_sendAckWithCollectedTokens, "e", desc="Send ack with the tokens we've collected thus far.") { if (getL2CacheEntry(address).Tokens > 0) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -607,7 +633,7 @@ machine(L2Cache, "Token protocol") { } action(ee_sendDataWithAllTokens, "\e", desc="Send data and all tokens from cache to starver") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -626,7 +652,7 @@ machine(L2Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getL2CacheEntry(address).Tokens > 0); if (getL2CacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -644,7 +670,7 @@ machine(L2Cache, "Token protocol") { //assert(persistentTable.findSmallest(address) != id); // Make sure we never bounce tokens to ourself assert(getL2CacheEntry(address).Tokens > 0); if (getL2CacheEntry(address).Tokens > 1) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -666,7 +692,7 @@ machine(L2Cache, "Token protocol") { // assert(persistentTable.isLocked(address)); peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -684,7 +710,7 @@ machine(L2Cache, "Token protocol") { //assert(persistentTable.isLocked(address)); peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; if (in_msg.Type == CoherenceResponseType:WB_SHARED_DATA) { out_msg.Type := CoherenceResponseType:DATA_SHARED; @@ -706,7 +732,7 @@ machine(L2Cache, "Token protocol") { // assert(persistentTable.isLocked(address)); peek(responseNetwork_in, ResponseMsg) { // FIXME, should use a 3rd vnet in some cases - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -729,24 +755,31 @@ machine(L2Cache, "Token protocol") { action(j_forwardTransientRequestToLocalSharers, "j", desc="Forward external transient request to local sharers") { peek(requestNetwork_in, RequestMsg) { - if (getFilteringEnabled() == true && in_msg.RetryNum == 0 && sharersExist(in_msg.Address) == false) { - profile_filter_action(1); + if (filtering_enabled == true && in_msg.RetryNum == 0 && sharersExist(in_msg.Address) == false) { + //profile_filter_action(1); DEBUG_EXPR("filtered message"); DEBUG_EXPR(in_msg.RetryNum); } else { - enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) { + enqueue(localRequestNetwork_out, RequestMsg, latency=l2_response_latency ) { out_msg.Address := in_msg.Address; out_msg.Requestor := in_msg.Requestor; out_msg.RequestorMachine := in_msg.RequestorMachine; - out_msg.Destination := getLocalL1IDs(machineID); + + // + // Currently assuming only one chip so all L1s are local + // + //out_msg.Destination := getLocalL1IDs(machineID); + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.remove(in_msg.Requestor); + out_msg.Type := in_msg.Type; out_msg.isLocal := false; out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.AccessMode := in_msg.AccessMode; out_msg.Prefetch := in_msg.Prefetch; } - profile_filter_action(0); + //profile_filter_action(0); } } } @@ -756,7 +789,7 @@ machine(L2Cache, "Token protocol") { peek(L1requestNetwork_in, RequestMsg) { assert(getL2CacheEntry(address).Tokens > 0); //enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_to_L1_RESPONSE_LATENCY") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_SHARED; out_msg.Sender := machineID; @@ -774,7 +807,7 @@ machine(L2Cache, "Token protocol") { action(k_dataOwnerFromL2CacheToL1Requestor, "\k", desc="Send data and a token from cache to L1 requestor") { peek(L1requestNetwork_in, RequestMsg) { assert(getL2CacheEntry(address).Tokens > 0); - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -793,7 +826,7 @@ machine(L2Cache, "Token protocol") { peek(L1requestNetwork_in, RequestMsg) { // assert(getL2CacheEntry(address).Tokens == max_tokens()); //enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_to_L1_RESPONSE_LATENCY") { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -840,12 +873,13 @@ machine(L2Cache, "Token protocol") { } action(r_markNewSharer, "r", desc="Mark the new local sharer from local request message") { - peek(L1requestNetwork_in, RequestMsg) { - if (in_msg.Type == CoherenceRequestType:GETX) { - setNewWriter(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); - } else if (in_msg.Type == CoherenceRequestType:GETS) { - addNewSharer(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); + if (machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache) { + if (in_msg.Type == CoherenceRequestType:GETX) { + setNewWriter(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); + } else if (in_msg.Type == CoherenceRequestType:GETS) { + addNewSharer(in_msg.Address, machineIDToNodeID(in_msg.Requestor)); + } } } } @@ -854,16 +888,19 @@ machine(L2Cache, "Token protocol") { clearExclusiveBitIfExists(address); } - action( r_setMRU, "\rr", desc="manually set the MRU bit for cache line" ) { - if(isCacheTagPresent(address)) { - L2cacheMemory.setMRU(address); + action(r_setMRU, "\rr", desc="manually set the MRU bit for cache line" ) { + peek(L1requestNetwork_in, RequestMsg) { + if ((machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache) && + (isCacheTagPresent(address))) { + L2cacheMemory.setMRU(address); + } } } action(t_sendAckWithCollectedTokens, "t", desc="Send ack with the tokens we've collected thus far.") { if (getL2CacheEntry(address).Tokens > 0) { peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -881,7 +918,7 @@ machine(L2Cache, "Token protocol") { action(tt_sendLocalAckWithCollectedTokens, "tt", desc="Send ack with the tokens we've collected thus far.") { if (getL2CacheEntry(address).Tokens > 0) { peek(L1requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -906,19 +943,19 @@ machine(L2Cache, "Token protocol") { } action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") { - L2cacheMemory.allocate(address); + L2cacheMemory.allocate(address, new Entry); } action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { L2cacheMemory.deallocate(address); } - action(uu_profileMiss, "\u", desc="Profile the demand miss") { - peek(L1requestNetwork_in, RequestMsg) { + //action(uu_profileMiss, "\u", desc="Profile the demand miss") { + // peek(L1requestNetwork_in, RequestMsg) { // AccessModeType not implemented //profile_L2Cache_miss(convertToGenericType(in_msg.Type), in_msg.AccessMode, MessageSizeTypeToInt(in_msg.MessageSize), in_msg.Prefetch, machineIDToNodeID(in_msg.Requestor)); - } - } + // } + //} action(w_assertIncomingDataAndCacheDataMatch, "w", desc="Assert that the incoming data and the data in the cache match") { @@ -927,11 +964,6 @@ machine(L2Cache, "Token protocol") { } } - action(z_stall, "z", desc="Stall") { - } - - - //***************************************************** // TRANSITIONS @@ -961,7 +993,7 @@ machine(L2Cache, "Token protocol") { transition(NP, {L1_GETS, L1_GETX}) { a_broadcastLocalRequest; r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1012,7 +1044,7 @@ machine(L2Cache, "Token protocol") { a_broadcastLocalRequest; tt_sendLocalAckWithCollectedTokens; // send any tokens we have collected r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1020,7 +1052,7 @@ machine(L2Cache, "Token protocol") { a_broadcastLocalRequest; tt_sendLocalAckWithCollectedTokens; // send any tokens we have collected r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1181,7 +1213,7 @@ machine(L2Cache, "Token protocol") { tt_sendLocalAckWithCollectedTokens; r_markNewSharer; r_setMRU; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1294,7 +1326,7 @@ machine(L2Cache, "Token protocol") { k_dataAndAllTokensFromL2CacheToL1Requestor; r_markNewSharer; r_setMRU; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1382,7 +1414,7 @@ machine(L2Cache, "Token protocol") { transition(I_L, {L1_GETX, L1_GETS}) { a_broadcastLocalRequest; r_markNewSharer; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } @@ -1391,7 +1423,7 @@ machine(L2Cache, "Token protocol") { tt_sendLocalAckWithCollectedTokens; r_markNewSharer; r_setMRU; - uu_profileMiss; + //uu_profileMiss; o_popL1RequestQueue; } diff --git a/src/mem/protocol/MOESI_CMP_token-dir.sm b/src/mem/protocol/MOESI_CMP_token-dir.sm index 1592fd123..7925a8fe0 100644 --- a/src/mem/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/protocol/MOESI_CMP_token-dir.sm @@ -32,14 +32,23 @@ */ -machine(Directory, "Token protocol") { - - MessageBuffer requestFromDir, network="To", virtual_network="1", ordered="false"; - MessageBuffer responseFromDir, network="To", virtual_network="2", ordered="false"; - - MessageBuffer persistentToDir, network="From", virtual_network="3", ordered="true"; - MessageBuffer requestToDir, network="From", virtual_network="1", ordered="false"; - MessageBuffer responseToDir, network="From", virtual_network="2", ordered="false"; +machine(Directory, "Token protocol") + : int directory_latency, + int l2_select_low_bit, + int l2_select_num_bits, + bool distributed_persistent, + int fixed_timeout_latency +{ + + MessageBuffer dmaResponseFromDir, network="To", virtual_network="0", ordered="true"; + MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; + MessageBuffer persistentFromDir, network="To", virtual_network="2", ordered="true"; + MessageBuffer requestFromDir, network="To", virtual_network="4", ordered="false"; + + MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false"; + MessageBuffer persistentToDir, network="From", virtual_network="2", ordered="true"; + MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; + MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; // STATES enumeration(State, desc="Directory states", default="Directory_State_O") { @@ -47,6 +56,24 @@ machine(Directory, "Token protocol") { O, desc="Owner"; NO, desc="Not Owner"; L, desc="Locked"; + + // Memory wait states - can block all messages including persistent requests + O_W, desc="transitioning to Owner, waiting for memory write"; + L_W, desc="transitioning to Locked, waiting for memory read"; + DR_L_W, desc="transitioning to Locked underneath a DMA read, waiting for memory data"; + NO_W, desc="transitioning to Not Owner, waiting for memory read"; + O_DW_W, desc="transitioning to Owner, waiting for memory before DMA ack"; + O_DR_W, desc="transitioning to Owner, waiting for memory before DMA data"; + + // DMA request transient states - must respond to persistent requests + O_DW, desc="issued GETX for DMA write, waiting for all tokens"; + NO_DW, desc="issued GETX for DMA write, waiting for all tokens"; + NO_DR, desc="issued GETS for DMA read, waiting for data"; + + // DMA request in progress - competing with a CPU persistent request + DW_L, desc="issued GETX for DMA write, CPU persistent request must complete first"; + DR_L, desc="issued GETS for DMA read, CPU persistent request must complete first"; + } // Events @@ -55,9 +82,23 @@ machine(Directory, "Token protocol") { GETS, desc="A GETS arrives"; Lockdown, desc="A lockdown request arrives"; Unlockdown, desc="An un-lockdown request arrives"; + Own_Lock_or_Unlock, desc="own lock or unlock"; Data_Owner, desc="Data arrive"; + Data_All_Tokens, desc="Data and all tokens"; Ack_Owner, desc="Owner token arrived without data because it was clean"; + Ack_Owner_All_Tokens, desc="All tokens including owner arrived without data because it was clean"; Tokens, desc="Tokens arrive"; + Ack_All_Tokens, desc="All_Tokens arrive"; + Request_Timeout, desc="A DMA request has timed out"; + + // Memory Controller + Memory_Data, desc="Fetched data from memory arrives"; + Memory_Ack, desc="Writeback Ack from memory arrives"; + + // DMA requests + DMA_READ, desc="A DMA Read memory request"; + DMA_WRITE, desc="A DMA Write memory request"; + DMA_WRITE_All_Tokens, desc="A DMA Write memory request, directory has all tokens"; } // TYPES @@ -73,7 +114,7 @@ machine(Directory, "Token protocol") { // is 'soft state' that does not need to be correct (as long as // you're eventually willing to resort to broadcast.) - Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time."; + Set Owner, desc="Probable Owner of the line. More accurately, the set of processors who need to see a GetS or GetO. We use a Set for convenience, but only one bit is set at a time."; Set Sharers, desc="Probable sharers of the line. More accurately, the set of processors who need to see a GetX"; } @@ -82,23 +123,70 @@ machine(Directory, "Token protocol") { bool isPresent(Address); } + external_type(MemoryControl, inport="yes", outport="yes") { + + } + + external_type(PersistentTable) { + void persistentRequestLock(Address, MachineID, AccessType); + void persistentRequestUnlock(Address, MachineID); + bool okToIssueStarving(Address, MachineID); + MachineID findSmallest(Address); + AccessType typeOfSmallest(Address); + void markEntries(Address); + bool isLocked(Address); + int countStarvingForAddress(Address); + int countReadStarvingForAddress(Address); + } + + // TBE entries for DMA requests + structure(TBE, desc="TBE entries for outstanding DMA requests") { + Address PhysicalAddress, desc="physical address"; + State TBEState, desc="Transient State"; + DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory"; + DataBlock DataBlk, desc="The current view of system memory"; + int Len, desc="..."; + MachineID DmaRequestor, desc="DMA requestor"; + bool WentPersistent, desc="Did the DMA request require a persistent request"; + } + + external_type(TBETable) { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } // ** OBJECTS ** - DirectoryMemory directory, constructor_hack="i"; + DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])'; + + MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; - PersistentTable persistentTable, constructor_hack="i"; + PersistentTable persistentTable; + TimerTable reissueTimerTable; + + TBETable TBEs, template_hack=""; + + bool starving, default="false"; State getState(Address addr) { - return directory[addr].DirectoryState; + if (TBEs.isPresent(addr)) { + return TBEs[addr].TBEState; + } else { + return directory[addr].DirectoryState; + } } void setState(Address addr, State state) { + if (TBEs.isPresent(addr)) { + TBEs[addr].TBEState := state; + } directory[addr].DirectoryState := state; if (state == State:L) { assert(directory[addr].Tokens == 0); - } + } // We have one or zero owners assert((directory[addr].Owner.count() == 0) || (directory[addr].Owner.count() == 1)); @@ -112,19 +200,90 @@ machine(Directory, "Token protocol") { // assert(directory[addr].Tokens >= (max_tokens() / 2)); // Only mostly true; this might not always hold } } + + bool okToIssueStarving(Address addr, MachineID machinID) { + return persistentTable.okToIssueStarving(addr, machineID); + } + + void markPersistentEntries(Address addr) { + persistentTable.markEntries(addr); + } // ** OUT_PORTS ** out_port(responseNetwork_out, ResponseMsg, responseFromDir); + out_port(persistentNetwork_out, PersistentMsg, persistentFromDir); out_port(requestNetwork_out, RequestMsg, requestFromDir); + out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir); + + // + // Memory buffer for memory controller to DIMM communication + // + out_port(memQueue_out, MemoryMsg, memBuffer); // ** IN_PORTS ** + + // off-chip memory request/response is done + in_port(memQueue_in, MemoryMsg, memBuffer) { + if (memQueue_in.isReady()) { + peek(memQueue_in, MemoryMsg) { + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:Memory_Data, in_msg.Address); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:Memory_Ack, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + + // Reissue Timer + in_port(reissueTimerTable_in, Address, reissueTimerTable) { + if (reissueTimerTable_in.isReady()) { + trigger(Event:Request_Timeout, reissueTimerTable.readyAddress()); + } + } + + in_port(responseNetwork_in, ResponseMsg, responseToDir) { + if (responseNetwork_in.isReady()) { + peek(responseNetwork_in, ResponseMsg) { + assert(in_msg.Destination.isElement(machineID)); + if (directory[in_msg.Address].Tokens + in_msg.Tokens == max_tokens()) { + if ((in_msg.Type == CoherenceResponseType:DATA_OWNER) || + (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { + trigger(Event:Data_All_Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { + trigger(Event:Ack_Owner_All_Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack_All_Tokens, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } else { + if (in_msg.Type == CoherenceResponseType:DATA_OWNER) { + trigger(Event:Data_Owner, in_msg.Address); + } else if ((in_msg.Type == CoherenceResponseType:ACK) || + (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { + trigger(Event:Tokens, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { + trigger(Event:Ack_Owner, in_msg.Address); + } else { + DEBUG_EXPR(in_msg.Type); + error("Invalid message"); + } + } + } + } + } in_port(persistentNetwork_in, PersistentMsg, persistentToDir) { if (persistentNetwork_in.isReady()) { peek(persistentNetwork_in, PersistentMsg) { assert(in_msg.Destination.isElement(machineID)); - if (distributedPersistentEnabled()) { + if (distributed_persistent) { // Apply the lockdown or unlockdown message to the table if (in_msg.Type == PersistentRequestType:GETX_PERSISTENT) { persistentTable.persistentRequestLock(in_msg.Address, in_msg.Requestor, AccessType:Write); @@ -173,19 +332,18 @@ machine(Directory, "Token protocol") { } } - in_port(responseNetwork_in, ResponseMsg, responseToDir) { - if (responseNetwork_in.isReady()) { - peek(responseNetwork_in, ResponseMsg) { - assert(in_msg.Destination.isElement(machineID)); - if (in_msg.Type == CoherenceResponseType:DATA_OWNER) { - trigger(Event:Data_Owner, in_msg.Address); - } else if ((in_msg.Type == CoherenceResponseType:ACK) || - (in_msg.Type == CoherenceResponseType:DATA_SHARED)) { - trigger(Event:Tokens, in_msg.Address); - } else if (in_msg.Type == CoherenceResponseType:ACK_OWNER) { - trigger(Event:Ack_Owner, in_msg.Address); + in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, DMARequestMsg) { + if (in_msg.Type == DMARequestType:READ) { + trigger(Event:DMA_READ, in_msg.LineAddress); + } else if (in_msg.Type == DMARequestType:WRITE) { + if (directory[in_msg.LineAddress].Tokens == max_tokens()) { + trigger(Event:DMA_WRITE_All_Tokens, in_msg.LineAddress); + } else { + trigger(Event:DMA_WRITE, in_msg.LineAddress); + } } else { - DEBUG_EXPR(in_msg.Type); error("Invalid message"); } } @@ -199,7 +357,7 @@ machine(Directory, "Token protocol") { if (directory[address].Tokens > 0) { peek(requestNetwork_in, RequestMsg) { // enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME? - enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME? + enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME? out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -213,11 +371,151 @@ machine(Directory, "Token protocol") { } } + action(px_tryIssuingPersistentGETXRequest, "px", desc="...") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:GETX_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + markPersistentEntries(address); + starving := true; + + TBEs[address].WentPersistent := true; + + // Do not schedule a wakeup, a persistent requests will always complete + } else { + + // We'd like to issue a persistent request, but are not allowed + // to issue a P.R. right now. This, we do not increment the + // IssueCount. + + // Set a wakeup timer + reissueTimerTable.set(address, 10); + } + } + + action(bw_broadcastWrite, "bw", desc="Broadcast GETX if we need tokens") { + peek(dmaRequestQueue_in, DMARequestMsg) { + // + // Assser that we only send message if we don't already have all the tokens + // + assert(directory[address].Tokens != max_tokens()); + enqueue(requestNetwork_out, RequestMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + + // + // Since only one chip, assuming all L1 caches are local + // + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.RetryNum := 0; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + } + } + + action(ps_tryIssuingPersistentGETSRequest, "ps", desc="...") { + if (okToIssueStarving(address, machineID) && (starving == false)) { + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:GETS_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + markPersistentEntries(address); + starving := true; + + TBEs[address].WentPersistent := true; + + // Do not schedule a wakeup, a persistent requests will always complete + } else { + + // We'd like to issue a persistent request, but are not allowed + // to issue a P.R. right now. This, we do not increment the + // IssueCount. + + // Set a wakeup timer + reissueTimerTable.set(address, 10); + } + } + + action(br_broadcastRead, "br", desc="Broadcast GETS for data") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(requestNetwork_out, RequestMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + + // + // Since only one chip, assuming all L1 caches are local + // + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.RetryNum := 0; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Prefetch := PrefetchBit:No; + out_msg.AccessMode := AccessModeType:SupervisorMode; + } + } + } + action(aa_sendTokensToStarver, "\a", desc="Send tokens to starver") { // Only send a message if we have tokens to send if (directory[address].Tokens > 0) { // enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_CACHE_LATENCY") {// FIXME? - enqueue(responseNetwork_out, ResponseMsg, latency="DIRECTORY_LATENCY") {// FIXME? + enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {// FIXME? out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -230,14 +528,14 @@ machine(Directory, "Token protocol") { } } - action(d_sendDataWithAllTokens, "d", desc="Send data and tokens to requestor") { - peek(requestNetwork_in, RequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { + action(d_sendMemoryDataWithAllTokens, "d", desc="Send data and tokens to requestor") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:Directory; - out_msg.Destination.add(in_msg.Requestor); + out_msg.Destination.add(in_msg.OriginalRequestorMachId); assert(directory[address].Tokens > 0); out_msg.Tokens := directory[in_msg.Address].Tokens; out_msg.DataBlk := directory[in_msg.Address].DataBlk; @@ -249,21 +547,140 @@ machine(Directory, "Token protocol") { } action(dd_sendDataWithAllTokensToStarver, "\d", desc="Send data and tokens to starver") { - enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") { - out_msg.Address := address; - out_msg.Type := CoherenceResponseType:DATA_OWNER; - out_msg.Sender := machineID; - out_msg.SenderMachine := MachineType:Directory; - out_msg.Destination.add(persistentTable.findSmallest(address)); - assert(directory[address].Tokens > 0); - out_msg.Tokens := directory[address].Tokens; - out_msg.DataBlk := directory[address].DataBlk; - out_msg.Dirty := false; - out_msg.MessageSize := MessageSizeType:Response_Data; + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_OWNER; + out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:Directory; + out_msg.Destination.add(persistentTable.findSmallest(address)); + assert(directory[address].Tokens > 0); + out_msg.Tokens := directory[address].Tokens; + out_msg.DataBlk := directory[address].DataBlk; + out_msg.Dirty := false; + out_msg.MessageSize := MessageSizeType:Response_Data; + } } directory[address].Tokens := 0; } + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { + peek(requestNetwork_in, RequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(fd_memoryDma, "fd", desc="Queue off-chip fetch request") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } + + action(lq_queueMemoryWbRequest, "lq", desc="Write data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + DEBUG_EXPR(out_msg); + } + } + + action(ld_queueMemoryDmaWriteFromTbe, "ld", desc="Write DMA data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + // then add the dma write data + out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + DEBUG_EXPR(out_msg); + } + } + + action(lr_queueMemoryDmaReadWriteback, "lr", desc="Write DMA data from read to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + + action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaDataBlk := in_msg.DataBlk; + TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + TBEs[address].WentPersistent := false; + } + } + + action(s_deallocateTBE, "s", desc="Deallocate TBE") { + + if (TBEs[address].WentPersistent) { + assert(starving == true); + + enqueue(persistentNetwork_out, PersistentMsg, latency = "1") { + out_msg.Address := address; + out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:Directory; + out_msg.Destination.broadcast(MachineType:L1Cache); + + // + // Currently the configuration system limits the system to only one + // chip. Therefore, if we assume one shared L2 cache, then only one + // pertinent L2 cache exist. + // + //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; + } + starving := false; + } + + TBEs.deallocate(address); + } + + action(rd_recordDataInTbe, "rd", desc="Record data in TBE") { + peek(responseNetwork_in, ResponseMsg) { + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + + action(cd_writeCleanDataToTbe, "cd", desc="Write clean memory data to TBE") { + TBEs[address].DataBlk := directory[address].DataBlk; + } + + action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { + directory[address].DataBlk := TBEs[address].DataBlk; + directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + } + action(f_incrementTokens, "f", desc="Increment the number of tokens we're tracking") { peek(responseNetwork_in, ResponseMsg) { assert(in_msg.Tokens >= 1); @@ -275,14 +692,34 @@ machine(Directory, "Token protocol") { requestNetwork_in.dequeue(); } + action(z_recycleRequest, "z", desc="Recycle the request queue") { + requestNetwork_in.recycle(); + } + action(k_popIncomingResponseQueue, "k", desc="Pop incoming response queue") { responseNetwork_in.dequeue(); } + action(kz_recycleResponse, "kz", desc="Recycle incoming response queue") { + responseNetwork_in.recycle(); + } + action(l_popIncomingPersistentQueue, "l", desc="Pop incoming persistent queue") { persistentNetwork_in.dequeue(); } + action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") { + dmaRequestQueue_in.recycle(); + } + + action(l_popMemQueue, "q", desc="Pop off-chip request queue") { + memQueue_in.dequeue(); + } + action(m_writeDataToMemory, "m", desc="Write dirty writeback to memory") { peek(responseNetwork_in, ResponseMsg) { directory[in_msg.Address].DataBlk := in_msg.DataBlk; @@ -291,18 +728,15 @@ machine(Directory, "Token protocol") { } } - action(n_checkIncomingMsg, "n", desc="Check incoming token message") { + action(n_checkData, "n", desc="Check incoming clean data message") { peek(responseNetwork_in, ResponseMsg) { - assert(in_msg.Type == CoherenceResponseType:ACK_OWNER); - assert(in_msg.Dirty == false); - assert(in_msg.MessageSize == MessageSizeType:Writeback_Control); assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk); } } action(r_bounceResponse, "r", desc="Bounce response to starving processor") { peek(responseNetwork_in, ResponseMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; @@ -316,7 +750,20 @@ machine(Directory, "Token protocol") { } } - action(s_bounceDatalessOwnerToken, "s", desc="Bounce clean owner token to starving processor") { + action(st_scheduleTimeout, "st", desc="Schedule Timeout") { + // + // currently only support a fixed timeout latency + // + reissueTimerTable.set(address, fixed_timeout_latency); + } + + action(ut_unsetReissueTimer, "ut", desc="Unset reissue timer.") { + if (reissueTimerTable.isSet(address)) { + reissueTimerTable.unset(address); + } + } + + action(bd_bounceDatalessOwnerToken, "bd", desc="Bounce clean owner token to starving processor") { peek(responseNetwork_in, ResponseMsg) { assert(in_msg.Type == CoherenceResponseType:ACK_OWNER); assert(in_msg.Dirty == false); @@ -331,7 +778,7 @@ machine(Directory, "Token protocol") { // Bounce the message, but "re-associate" the data and the owner // token. In essence we're converting an ACK_OWNER message to a // DATA_OWNER message, keeping the number of tokens the same. - enqueue(responseNetwork_out, ResponseMsg, latency="NULL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_OWNER; out_msg.Sender := machineID; @@ -346,53 +793,212 @@ machine(Directory, "Token protocol") { } } + action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:ACK; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + + action(dm_sendMemoryDataToDma, "dm", desc="Send Data to DMA controller from memory") { + peek(memQueue_in, MemoryMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dd_sendDmaData, "dd", desc="Send Data to DMA controller") { + peek(responseNetwork_in, ResponseMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } // TRANSITIONS - // Trans. from O - transition(O, GETX, NO) { - d_sendDataWithAllTokens; + // + // Trans. from base state O + // the directory has valid data + // + transition(O, GETX, NO_W) { + qf_queueMemoryFetchRequest; j_popIncomingRequestQueue; } - transition(O, GETS, NO) { - d_sendDataWithAllTokens; + transition(O, DMA_WRITE, O_DW) { + vd_allocateDmaRequestInTBE; + bw_broadcastWrite; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + + transition(O, DMA_WRITE_All_Tokens, O_DW_W) { + vd_allocateDmaRequestInTBE; + cd_writeCleanDataToTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + p_popDmaRequestQueue; + } + + transition(O, GETS, NO_W) { + qf_queueMemoryFetchRequest; j_popIncomingRequestQueue; } - transition(O, Lockdown, L) { - dd_sendDataWithAllTokensToStarver; + transition(O, DMA_READ, O_DR_W) { + vd_allocateDmaRequestInTBE; + fd_memoryDma; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + + transition(O, Lockdown, L_W) { + qf_queueMemoryFetchRequest; + l_popIncomingPersistentQueue; + } + + transition(O, {Tokens, Ack_All_Tokens}) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + transition(O, {Data_Owner, Data_All_Tokens}) { + n_checkData; + f_incrementTokens; + k_popIncomingResponseQueue; + } + + // + // transitioning to Owner, waiting for memory before DMA ack + // All other events should recycle/stall + // + transition(O_DR_W, Memory_Data, O) { + dm_sendMemoryDataToDma; + ut_unsetReissueTimer; + s_deallocateTBE; + l_popMemQueue; + } + + // + // issued GETX for DMA write, waiting for all tokens + // + transition(O_DW, Tokens) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + transition(O_DW, Data_Owner) { + f_incrementTokens; + rd_recordDataInTbe; + k_popIncomingResponseQueue; + } + + transition(O_DW, Ack_Owner) { + f_incrementTokens; + cd_writeCleanDataToTbe; + k_popIncomingResponseQueue; + } + + transition(O_DW, Lockdown, DW_L) { l_popIncomingPersistentQueue; } - transition(O, Tokens) { + transition({NO_DW, O_DW}, Data_All_Tokens, O_DW_W) { f_incrementTokens; + rd_recordDataInTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; k_popIncomingResponseQueue; } + transition(O_DW, Ack_All_Tokens, O_DW_W) { + f_incrementTokens; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; + k_popIncomingResponseQueue; + } + + transition(O_DW, Ack_Owner_All_Tokens, O_DW_W) { + f_incrementTokens; + cd_writeCleanDataToTbe; + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWriteFromTbe; + ut_unsetReissueTimer; + k_popIncomingResponseQueue; + } + + transition(O_DW_W, Memory_Ack, O) { + da_sendDmaAck; + s_deallocateTBE; + l_popMemQueue; + } + + // // Trans. from NO + // The direcotry does not have valid data, but may have some tokens + // transition(NO, GETX) { a_sendTokens; j_popIncomingRequestQueue; } + transition(NO, DMA_WRITE, NO_DW) { + vd_allocateDmaRequestInTBE; + bw_broadcastWrite; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + transition(NO, GETS) { j_popIncomingRequestQueue; } + transition(NO, DMA_READ, NO_DR) { + vd_allocateDmaRequestInTBE; + br_broadcastRead; + st_scheduleTimeout; + p_popDmaRequestQueue; + } + transition(NO, Lockdown, L) { aa_sendTokensToStarver; l_popIncomingPersistentQueue; } - transition(NO, Data_Owner, O) { + transition(NO, {Data_Owner, Data_All_Tokens}, O_W) { m_writeDataToMemory; f_incrementTokens; + lq_queueMemoryWbRequest; k_popIncomingResponseQueue; } - transition(NO, Ack_Owner, O) { - n_checkIncomingMsg; + transition(NO, {Ack_Owner, Ack_Owner_All_Tokens}, O) { + n_checkData; f_incrementTokens; k_popIncomingResponseQueue; } @@ -402,34 +1008,156 @@ machine(Directory, "Token protocol") { k_popIncomingResponseQueue; } + transition(NO_W, Memory_Data, NO) { + d_sendMemoryDataWithAllTokens; + l_popMemQueue; + } + + // Trans. from NO_DW + transition(NO_DW, Request_Timeout) { + ut_unsetReissueTimer; + px_tryIssuingPersistentGETXRequest; + } + + transition(NO_DW, Lockdown, DW_L) { + aa_sendTokensToStarver; + l_popIncomingPersistentQueue; + } + + // Note: NO_DW, Data_All_Tokens transition is combined with O_DW + // Note: NO_DW should not receive the action Ack_All_Tokens because the + // directory does not have valid data + + transition(NO_DW, Data_Owner, O_DW) { + f_incrementTokens; + rd_recordDataInTbe; + lq_queueMemoryWbRequest; + k_popIncomingResponseQueue; + } + + transition({NO_DW, NO_DR}, Tokens) { + f_incrementTokens; + k_popIncomingResponseQueue; + } + + // Trans. from NO_DR + transition(NO_DR, Request_Timeout) { + ut_unsetReissueTimer; + ps_tryIssuingPersistentGETSRequest; + } + + transition(NO_DR, Lockdown, DR_L) { + aa_sendTokensToStarver; + l_popIncomingPersistentQueue; + } + + transition(NO_DR, {Data_Owner, Data_All_Tokens}, O_W) { + m_writeDataToMemory; + f_incrementTokens; + dd_sendDmaData; + lr_queueMemoryDmaReadWriteback; + ut_unsetReissueTimer; + s_deallocateTBE; + k_popIncomingResponseQueue; + } + // Trans. from L - transition(L, {GETX, GETS}) { + transition({L, DW_L, DR_L}, {GETX, GETS}) { j_popIncomingRequestQueue; } - transition(L, Lockdown) { + transition({L, DW_L, DR_L, L_W, DR_L_W}, Lockdown) { l_popIncomingPersistentQueue; } - // we could change this to write the data to memory and send it cleanly - transition(L, Data_Owner) { + // + // Received data for lockdown blocks + // For blocks with outstanding dma requests to them + // ...we could change this to write the data to memory and send it cleanly + // ...we could also proactively complete our DMA requests + // However, to keep my mind from spinning out-of-control, we won't for now :) + // + transition({DW_L, DR_L, L}, {Data_Owner, Data_All_Tokens}) { r_bounceResponse; k_popIncomingResponseQueue; } - transition(L, Tokens) { + transition({DW_L, DR_L, L}, Tokens) { r_bounceResponse; k_popIncomingResponseQueue; } - transition(L, Ack_Owner) { - s_bounceDatalessOwnerToken; + transition({DW_L, DR_L, L}, {Ack_Owner_All_Tokens, Ack_Owner}) { + bd_bounceDatalessOwnerToken; k_popIncomingResponseQueue; } - transition(L, Unlockdown, NO) { l_popIncomingPersistentQueue; } + transition(L_W, Memory_Data, L) { + dd_sendDataWithAllTokensToStarver; + l_popMemQueue; + } + + transition(DR_L_W, Memory_Data, DR_L) { + dd_sendDataWithAllTokensToStarver; + l_popMemQueue; + } + + transition(DW_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DW) { + l_popIncomingPersistentQueue; + } + + transition(DR_L_W, {Unlockdown, Own_Lock_or_Unlock}, O_DR_W) { + l_popIncomingPersistentQueue; + } + + transition({DW_L, DR_L_W}, Request_Timeout) { + ut_unsetReissueTimer; + px_tryIssuingPersistentGETXRequest; + } + + transition(DR_L, {Unlockdown, Own_Lock_or_Unlock}, NO_DR) { + l_popIncomingPersistentQueue; + } + + transition(DR_L, Request_Timeout) { + ut_unsetReissueTimer; + ps_tryIssuingPersistentGETSRequest; + } + + transition(O_W, Memory_Ack, O) { + l_popMemQueue; + } + + transition({O, NO, L, O_DW, NO_DW, NO_DR}, Own_Lock_or_Unlock) { + l_popIncomingPersistentQueue; + } + + // Blocked states + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR}, {GETX, GETS}) { + z_recycleRequest; + } + + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W, O_DW, NO_DW, NO_DR, L, DW_L, DR_L}, {DMA_READ, DMA_WRITE}) { + y_recycleDmaRequestQueue; + } + + transition({NO_W, O_W, L_W, DR_L_W, O_DW_W, O_DR_W}, {Data_Owner, Ack_Owner, Tokens}) { + kz_recycleResponse; + } + + transition({NO_W, O_W}, Lockdown, L_W) { + l_popIncomingPersistentQueue; + } + + transition(O_DR_W, Lockdown, DR_L_W) { + l_popIncomingPersistentQueue; + } + + transition({NO_W, O_W, O_DR_W}, {Unlockdown, Own_Lock_or_Unlock}) { + l_popIncomingPersistentQueue; + } } diff --git a/src/mem/protocol/MOESI_CMP_token-dma.sm b/src/mem/protocol/MOESI_CMP_token-dma.sm new file mode 100644 index 000000000..550a36ae0 --- /dev/null +++ b/src/mem/protocol/MOESI_CMP_token-dma.sm @@ -0,0 +1,165 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +machine(DMA, "DMA Controller") +: int request_latency +{ + + MessageBuffer responseFromDir, network="From", virtual_network="0", ordered="true", no_vector="true"; + MessageBuffer reqToDirectory, network="To", virtual_network="5", ordered="false", no_vector="true"; + + enumeration(State, desc="DMA states", default="DMA_State_READY") { + READY, desc="Ready to accept a new request"; + BUSY_RD, desc="Busy: currently processing a request"; + BUSY_WR, desc="Busy: currently processing a request"; + } + + enumeration(Event, desc="DMA events") { + ReadRequest, desc="A new read request"; + WriteRequest, desc="A new write request"; + Data, desc="Data from a DMA memory read"; + Ack, desc="DMA write to memory completed"; + } + + external_type(DMASequencer) { + void ackCallback(); + void dataCallback(DataBlock); + } + + MessageBuffer mandatoryQueue, ordered="false", no_vector="true"; + DMASequencer dma_sequencer, factory='RubySystem::getDMASequencer(m_cfg["dma_sequencer"])', no_vector="true"; + State cur_state, no_vector="true"; + + State getState(Address addr) { + return cur_state; + } + void setState(Address addr, State state) { + cur_state := state; + } + + out_port(reqToDirectory_out, DMARequestMsg, reqToDirectory, desc="..."); + + in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, SequencerMsg) { + if (in_msg.Type == SequencerRequestType:LD ) { + trigger(Event:ReadRequest, in_msg.LineAddress); + } else if (in_msg.Type == SequencerRequestType:ST) { + trigger(Event:WriteRequest, in_msg.LineAddress); + } else { + error("Invalid request type"); + } + } + } + } + + in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") { + if (dmaResponseQueue_in.isReady()) { + peek( dmaResponseQueue_in, DMAResponseMsg) { + if (in_msg.Type == DMAResponseType:ACK) { + trigger(Event:Ack, in_msg.LineAddress); + } else if (in_msg.Type == DMAResponseType:DATA) { + trigger(Event:Data, in_msg.LineAddress); + } else { + error("Invalid response type"); + } + } + } + } + + action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:READ; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:WRITE; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.ackCallback(); + } + } + + action(d_dataCallback, "d", desc="Write data to dma sequencer") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.dataCallback(in_msg.DataBlk); + } + } + + action(p_popRequestQueue, "p", desc="Pop request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(p_popResponseQueue, "\p", desc="Pop request queue") { + dmaResponseQueue_in.dequeue(); + } + + transition(READY, ReadRequest, BUSY_RD) { + s_sendReadRequest; + p_popRequestQueue; + } + + transition(READY, WriteRequest, BUSY_WR) { + s_sendWriteRequest; + p_popRequestQueue; + } + + transition(BUSY_RD, Data, READY) { + d_dataCallback; + p_popResponseQueue; + } + + transition(BUSY_WR, Ack, READY) { + a_ackCallback; + p_popResponseQueue; + } +} diff --git a/src/mem/protocol/MOESI_CMP_token-msg.sm b/src/mem/protocol/MOESI_CMP_token-msg.sm index 2a75ce644..40c16b5e1 100644 --- a/src/mem/protocol/MOESI_CMP_token-msg.sm +++ b/src/mem/protocol/MOESI_CMP_token-msg.sm @@ -59,8 +59,10 @@ enumeration(CoherenceResponseType, desc="...") { // TriggerType enumeration(TriggerType, desc="...") { - REQUEST_TIMEOUT, desc="See corresponding event"; + REQUEST_TIMEOUT, desc="See corresponding event"; USE_TIMEOUT, desc="See corresponding event"; + DATA, desc="data for dma read response"; + DATA_ALL_TOKENS, desc="data and all tokens for dma write response"; } // TriggerMsg @@ -111,13 +113,45 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") { MessageSizeType MessageSize, desc="size category of the message"; } -GenericRequestType convertToGenericType(CoherenceRequestType type) { - if(type == CoherenceRequestType:GETS) { - return GenericRequestType:GETS; - } else if(type == CoherenceRequestType:GETX) { - return GenericRequestType:GETX; - } else { - DEBUG_EXPR(type); - error("invalid CoherenceRequestType"); - } +enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") { + READ, desc="Memory Read"; + WRITE, desc="Memory Write"; + NULL, desc="Invalid"; } + +enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") { + DATA, desc="DATA read"; + ACK, desc="ACK write"; + NULL, desc="Invalid"; +} + +structure(DMARequestMsg, desc="...", interface="NetworkMessage") { + DMARequestType Type, desc="Request type (read/write)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + int Len, desc="The length of the request"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +structure(DMAResponseMsg, desc="...", interface="NetworkMessage") { + DMAResponseType Type, desc="Response type (DATA/ACK)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +//GenericRequestType convertToGenericType(CoherenceRequestType type) { +// if(type == CoherenceRequestType:GETS) { +// return GenericRequestType:GETS; +// } else if(type == CoherenceRequestType:GETX) { +// return GenericRequestType:GETX; +// } else { +// DEBUG_EXPR(type); +// error("invalid CoherenceRequestType"); +// } +//} diff --git a/src/mem/protocol/MOESI_CMP_token.slicc b/src/mem/protocol/MOESI_CMP_token.slicc index ae4a6d6ec..a41226f90 100644 --- a/src/mem/protocol/MOESI_CMP_token.slicc +++ b/src/mem/protocol/MOESI_CMP_token.slicc @@ -2,4 +2,5 @@ MOESI_CMP_token-msg.sm MOESI_CMP_token-L1cache.sm MOESI_CMP_token-L2cache.sm MOESI_CMP_token-dir.sm +MOESI_CMP_token-dma.sm standard_CMP-protocol.sm diff --git a/src/mem/protocol/RubySlicc_Util.sm b/src/mem/protocol/RubySlicc_Util.sm index 312682bd7..e1771448f 100644 --- a/src/mem/protocol/RubySlicc_Util.sm +++ b/src/mem/protocol/RubySlicc_Util.sm @@ -52,7 +52,6 @@ void dirProfileCoherenceRequest(NodeID node, bool needCLB); bool isPerfectProtocol(); bool L1trainsPrefetcher(); int max_tokens(); -int N_tokens(); bool distributedPersistentEnabled(); Address setOffset(Address addr, int offset); Address makeLineAddress(Address addr); -- cgit v1.2.3