diff options
Diffstat (limited to 'src/mem')
-rw-r--r-- | src/mem/protocol/MESI_CMP_directory-L1cache.sm | 108 | ||||
-rw-r--r-- | src/mem/protocol/MESI_CMP_directory-L2cache.sm | 158 | ||||
-rw-r--r-- | src/mem/protocol/MESI_CMP_directory-mem.sm | 274 | ||||
-rw-r--r-- | src/mem/protocol/MESI_CMP_directory-msg.sm | 86 | ||||
-rw-r--r-- | src/mem/protocol/MI_example-dir.sm | 49 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_directory-L1cache.sm | 33 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_directory-L2cache.sm | 62 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_directory-dir.sm | 32 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_directory-dma.sm | 10 | ||||
-rw-r--r-- | src/mem/ruby/common/DataBlock.hh | 1 | ||||
-rw-r--r-- | src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb | 36 | ||||
-rw-r--r-- | src/mem/ruby/config/cfg.rb | 45 | ||||
-rw-r--r-- | src/mem/ruby/config/defaults.rb | 39 |
13 files changed, 698 insertions, 235 deletions
diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm index efdc58e1b..32669190f 100644 --- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm @@ -33,7 +33,14 @@ */ -machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATENCY_L1_RESPONSE_LATENCY LATENCY_TO_L2_LATENCY { +machine(L1Cache, "MSI Directory L1 Cache CMP") + : int l1_request_latency, + int l1_response_latency, + int to_l2_latency, + int l2_select_low_bit, + int l2_select_num_bits +{ + // NODE L1 CACHE // From this node's L1 cache TO the network @@ -120,7 +127,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); @@ -139,9 +146,9 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE // CacheMemory L1IcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1I"', abstract_chip_ptr="true"; // CacheMemory L1DcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1D"', abstract_chip_ptr="true"; - CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["L1Icache"])'; + CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])'; - CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["L1Dcache"])'; + CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])'; // MessageBuffer mandatoryQueue, ordered="false", rank="100", abstract_chip_ptr="true"; @@ -178,10 +185,10 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE } State getState(Address addr) { - if((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == true){ - DEBUG_EXPR(id); - DEBUG_EXPR(addr); - } +// if((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == true){ +// DEBUG_EXPR(id); +// DEBUG_EXPR(addr); +// } assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false); if(L1_TBEs.isPresent(addr)) { @@ -343,13 +350,14 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE // ACTIONS action(a_issueGETS, "a", desc="Issue GETS") { peek(mandatoryQueue_in, CacheMsg) { - enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); DEBUG_EXPR(address); - DEBUG_EXPR(out_msg.Destination); + //DEBUG_EXPR(out_msg.Destination); out_msg.MessageSize := MessageSizeType:Control; out_msg.Prefetch := in_msg.Prefetch; out_msg.AccessMode := in_msg.AccessMode; @@ -359,13 +367,14 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE action(ai_issueGETINSTR, "ai", desc="Issue GETINSTR") { peek(mandatoryQueue_in, CacheMsg) { - enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GET_INSTR; out_msg.Requestor := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); DEBUG_EXPR(address); - DEBUG_EXPR(out_msg.Destination); + //DEBUG_EXPR(out_msg.Destination); out_msg.MessageSize := MessageSizeType:Control; out_msg.Prefetch := in_msg.Prefetch; out_msg.AccessMode := in_msg.AccessMode; @@ -376,14 +385,15 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE action(b_issueGETX, "b", desc="Issue GETX") { peek(mandatoryQueue_in, CacheMsg) { - enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; - DEBUG_EXPR(machineID); - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + //DEBUG_EXPR(machineID); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); DEBUG_EXPR(address); - DEBUG_EXPR(out_msg.Destination); + //DEBUG_EXPR(out_msg.Destination); out_msg.MessageSize := MessageSizeType:Control; out_msg.Prefetch := in_msg.Prefetch; out_msg.AccessMode := in_msg.AccessMode; @@ -393,13 +403,14 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE action(c_issueUPGRADE, "c", desc="Issue GETX") { peek(mandatoryQueue_in, CacheMsg) { - enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_REQUEST_LATENCY") { + enqueue(requestIntraChipL1Network_out, RequestMsg, latency= l1_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:UPGRADE; out_msg.Requestor := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); DEBUG_EXPR(address); - DEBUG_EXPR(out_msg.Destination); + //DEBUG_EXPR(out_msg.Destination); out_msg.MessageSize := MessageSizeType:Control; out_msg.Prefetch := in_msg.Prefetch; out_msg.AccessMode := in_msg.AccessMode; @@ -409,7 +420,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE action(d_sendDataToRequestor, "d", desc="send data to requestor") { peek(requestIntraChipL1Network_in, RequestMsg) { - enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.DataBlk := getL1CacheEntry(address).DataBlk; @@ -422,20 +433,21 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE } action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") { - enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.DataBlk := getL1CacheEntry(address).DataBlk; out_msg.Dirty := getL1CacheEntry(address).Dirty; out_msg.Sender := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Data; } } action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") { peek(requestIntraChipL1Network_in, RequestMsg) { - enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.DataBlk := L1_TBEs[address].DataBlk; @@ -448,20 +460,21 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE } action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") { - enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.DataBlk := L1_TBEs[address].DataBlk; out_msg.Dirty := L1_TBEs[address].Dirty; out_msg.Sender := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Data; } } action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") { peek(requestIntraChipL1Network_in, RequestMsg) { - enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -472,32 +485,34 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE } action(f_sendDataToL2, "f", desc="send data to the L2 cache") { - enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.DataBlk := getL1CacheEntry(address).DataBlk; out_msg.Dirty := getL1CacheEntry(address).Dirty; out_msg.Sender := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Data; } } action(ft_sendDataToL2_fromTBE, "ft", desc="send data to the L2 cache") { - enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.DataBlk := L1_TBEs[address].DataBlk; out_msg.Dirty := L1_TBEs[address].Dirty; out_msg.Sender := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Data; } } action(fi_sendInvAck, "fi", desc="send data to the L2 cache") { peek(requestIntraChipL1Network_in, RequestMsg) { - enqueue(responseIntraChipL1Network_out, ResponseMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -510,13 +525,14 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE action(g_issuePUTX, "g", desc="send data to the L2 cache") { - enqueue(requestIntraChipL1Network_out, RequestMsg, latency="L1_RESPONSE_LATENCY") { + enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:PUTX; out_msg.DataBlk := getL1CacheEntry(address).DataBlk; out_msg.Dirty := getL1CacheEntry(address).Dirty; out_msg.Requestor:= machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); if (getL1CacheEntry(address).Dirty) { out_msg.MessageSize := MessageSizeType:Writeback_Data; } else { @@ -526,34 +542,40 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE } action(j_sendUnblock, "j", desc="send unblock to the L2 cache") { - enqueue(unblockNetwork_out, ResponseMsg, latency="TO_L2_LATENCY") { + enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:UNBLOCK; out_msg.Sender := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; + DEBUG_EXPR(address); + } } action(jj_sendExclusiveUnblock, "\j", desc="send unblock to the L2 cache") { - enqueue(unblockNetwork_out, ResponseMsg, latency="TO_L2_LATENCY") { + enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:EXCLUSIVE_UNBLOCK; out_msg.Sender := machineID; - out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; + DEBUG_EXPR(address); + } } action(h_load_hit, "h", desc="If not prefetch, notify sequencer the load completed.") { - DEBUG_EXPR(getL1CacheEntry(address).DataBlk); + //DEBUG_EXPR(getL1CacheEntry(address).DataBlk); sequencer.readCallback(address, getL1CacheEntry(address).DataBlk); } action(hh_store_hit, "\h", desc="If not prefetch, notify sequencer that store completed.") { - DEBUG_EXPR(getL1CacheEntry(address).DataBlk); + //DEBUG_EXPR(getL1CacheEntry(address).DataBlk); sequencer.writeCallback(address, getL1CacheEntry(address).DataBlk); getL1CacheEntry(address).Dirty := true; } @@ -611,13 +633,13 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") : LATENCY_L1_REQUEST_LATENCY LATE action(oo_allocateL1DCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B.") { if (L1DcacheMemory.isTagPresent(address) == false) { - L1DcacheMemory.allocate(address); + L1DcacheMemory.allocate(address, new Entry); } } action(pp_allocateL1ICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") { if (L1IcacheMemory.isTagPresent(address) == false) { - L1IcacheMemory.allocate(address); + L1IcacheMemory.allocate(address, new Entry); } } diff --git a/src/mem/protocol/MESI_CMP_directory-L2cache.sm b/src/mem/protocol/MESI_CMP_directory-L2cache.sm index 2bd9b3ce7..6439e4fb3 100644 --- a/src/mem/protocol/MESI_CMP_directory-L2cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L2cache.sm @@ -32,7 +32,11 @@ * */ -machine(L2Cache, "MOSI Directory L2 Cache CMP") { +machine(L2Cache, "MESI Directory L2 Cache CMP") + : int l2_request_latency, + int l2_response_latency, + int to_l1_latency +{ // L2 BANK QUEUES // From local bank of L2 cache TO the network @@ -41,9 +45,10 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { MessageBuffer responseFromL2Cache, network="To", virtual_network="3", ordered="false"; // this L2 bank -> a local L1 || Memory // FROM the network to this local bank of L2 cache + MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false"; // a local L1 || Memory -> this L2 bank MessageBuffer L1RequestToL2Cache, network="From", virtual_network="0", ordered="false"; // a local L1 -> this L2 bank MessageBuffer responseToL2Cache, network="From", virtual_network="3", ordered="false"; // a local L1 || Memory -> this L2 bank - MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false"; // a local L1 || Memory -> this L2 bank +// MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false"; // a local L1 || Memory -> this L2 bank // STATES enumeration(State, desc="L2 Cache states", default="L2Cache_State_NP") { @@ -73,7 +78,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { MT_IIB, desc="Blocked for L1_GETS from MT, waiting for unblock and data"; MT_IB, desc="Blocked for L1_GETS from MT, got unblock, waiting for data"; MT_SB, desc="Blocked for L1_GETS from MT, got data, waiting for unblock"; - + } // EVENTS @@ -111,6 +116,8 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { Unblock_Cancel, desc="Unblock from L1 requestor (FOR XACT MEMORY)"; Exclusive_Unblock, desc="Unblock from L1 requestor"; + MEM_Inv, desc="Invalidation from directory"; + } // TYPES @@ -141,7 +148,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { external_type(CacheMemory) { bool cacheAvail(Address); Address cacheProbe(Address); - void allocate(Address); + void allocate(Address, Entry); void deallocate(Address); Entry lookup(Address); void changePermission(Address, AccessPermission); @@ -156,12 +163,12 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { bool isPresent(Address); } - TBETable L2_TBEs, template_hack="<L2Cache_TBE>", no_vector="true"; + TBETable L2_TBEs, template_hack="<L2Cache_TBE>"; // CacheMemory L2cacheMemory, template_hack="<L2Cache_Entry>", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,MachineType_L2Cache,int_to_string(i)'; - CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])', no_vector="true"; + CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])'; // inclusive cache, returns L2 entries only Entry getL2CacheEntry(Address addr), return_by_ref="yes" { @@ -196,10 +203,9 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { } void addSharer(Address addr, MachineID requestor) { - DEBUG_EXPR(machineID); - DEBUG_EXPR(requestor); - DEBUG_EXPR(addr); - assert(map_L1CacheMachId_to_L2Cache(addr, requestor) == machineID); + //DEBUG_EXPR(machineID); + //DEBUG_EXPR(requestor); + //DEBUG_EXPR(addr); L2cacheMemory[addr].Sharers.add(requestor); } @@ -273,6 +279,29 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { out_port(responseIntraChipL2Network_out, ResponseMsg, responseFromL2Cache); + in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache) { + if(L1unblockNetwork_in.isReady()) { + peek(L1unblockNetwork_in, ResponseMsg) { + DEBUG_EXPR(in_msg.Address); + DEBUG_EXPR(getState(in_msg.Address)); + DEBUG_EXPR(in_msg.Sender); + DEBUG_EXPR(in_msg.Type); + DEBUG_EXPR(in_msg.Destination); + + assert(in_msg.Destination.isElement(machineID)); + if (in_msg.Type == CoherenceResponseType:EXCLUSIVE_UNBLOCK) { + trigger(Event:Exclusive_Unblock, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:UNBLOCK) { + trigger(Event:Unblock, in_msg.Address); + } else { + error("unknown unblock message"); + } + } + } + } + + + // Response IntraChip L2 Network - response msg to this particular L2 bank in_port(responseIntraChipL2Network_in, ResponseMsg, responseToL2Cache) { if (responseIntraChipL2Network_in.isReady()) { @@ -301,6 +330,8 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { trigger(Event:Mem_Data, in_msg.Address); // L2 now has data and all off-chip acks } else if(in_msg.Type == CoherenceResponseType:MEMORY_ACK) { trigger(Event:Mem_Ack, in_msg.Address); // L2 now has data and all off-chip acks + } else if(in_msg.Type == CoherenceResponseType:INV) { + trigger(Event:MEM_Inv, in_msg.Address); // L2 now has data and all off-chip acks } else { error("unknown message type"); } @@ -314,11 +345,11 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { if(L1RequestIntraChipL2Network_in.isReady()) { peek(L1RequestIntraChipL2Network_in, RequestMsg) { DEBUG_EXPR(in_msg.Address); - DEBUG_EXPR(id); + //DEBUG_EXPR(id); DEBUG_EXPR(getState(in_msg.Address)); - DEBUG_EXPR(in_msg.Requestor); + //DEBUG_EXPR(in_msg.Requestor); DEBUG_EXPR(in_msg.Type); - DEBUG_EXPR(in_msg.Destination); + //DEBUG_EXPR(in_msg.Destination); assert(machineIDToMachineType(in_msg.Requestor) == MachineType:L1Cache); assert(in_msg.Destination.isElement(machineID)); if (L2cacheMemory.isTagPresent(in_msg.Address)) { @@ -341,26 +372,12 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { } } - in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache) { - if(L1unblockNetwork_in.isReady()) { - peek(L1unblockNetwork_in, ResponseMsg) { - assert(in_msg.Destination.isElement(machineID)); - if (in_msg.Type == CoherenceResponseType:EXCLUSIVE_UNBLOCK) { - trigger(Event:Exclusive_Unblock, in_msg.Address); - } else if (in_msg.Type == CoherenceResponseType:UNBLOCK) { - trigger(Event:Unblock, in_msg.Address); - } else { - error("unknown unblock message"); - } - } - } - } // ACTIONS action(a_issueFetchToMemory, "a", desc="fetch data from memory") { peek(L1RequestIntraChipL2Network_in, RequestMsg) { - enqueue(DirRequestIntraChipL2Network_out, RequestMsg, latency="L2_REQUEST_LATENCY") { + enqueue(DirRequestIntraChipL2Network_out, RequestMsg, latency=l2_request_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; @@ -372,7 +389,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(b_forwardRequestToExclusive, "b", desc="Forward request to the exclusive L1") { peek(L1RequestIntraChipL2Network_in, RequestMsg) { - enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency="1") { + enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency=to_l1_latency) { out_msg.Address := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; @@ -383,7 +400,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { } action(c_exclusiveReplacement, "c", desc="Send data to memory") { - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:MEMORY_DATA; out_msg.Sender := machineID; @@ -394,8 +411,19 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { } } + action(c_exclusiveCleanReplacement, "cc", desc="Send ack to memory for clean replacement") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:ACK; + out_msg.Sender := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(ct_exclusiveReplacementFromTBE, "ct", desc="Send data to memory") { - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:MEMORY_DATA; out_msg.Sender := machineID; @@ -409,7 +437,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(d_sendDataToRequestor, "d", desc="Send data from cache to reqeustor") { peek(L1RequestIntraChipL2Network_in, RequestMsg) { - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.Sender := machineID; @@ -428,7 +456,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(dd_sendExclusiveDataToRequestor, "dd", desc="Send data from cache to reqeustor") { peek(L1RequestIntraChipL2Network_in, RequestMsg) { - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; out_msg.Sender := machineID; @@ -447,7 +475,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(ds_sendSharedDataToRequestor, "ds", desc="Send data from cache to reqeustor") { peek(L1RequestIntraChipL2Network_in, RequestMsg) { - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=l2_response_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.Sender := machineID; @@ -462,7 +490,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(e_sendDataToGetSRequestors, "e", desc="Send data from cache to all GetS IDs") { assert(L2_TBEs[address].L1_GetS_IDs.count() > 0); - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.Sender := machineID; @@ -475,7 +503,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(ex_sendExclusiveDataToGetSRequestors, "ex", desc="Send data from cache to all GetS IDs") { assert(L2_TBEs[address].L1_GetS_IDs.count() == 1); - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; out_msg.Sender := machineID; @@ -488,24 +516,24 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(ee_sendDataToGetXRequestor, "ee", desc="Send data from cache to GetX ID") { - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.Sender := machineID; out_msg.Destination.add(L2_TBEs[address].L1_GetX_ID); - DEBUG_EXPR(out_msg.Destination); + //DEBUG_EXPR(out_msg.Destination); out_msg.DataBlk := getL2CacheEntry(address).DataBlk; out_msg.Dirty := getL2CacheEntry(address).Dirty; DEBUG_EXPR(out_msg.Address); - DEBUG_EXPR(out_msg.Destination); - DEBUG_EXPR(out_msg.DataBlk); + //DEBUG_EXPR(out_msg.Destination); + //DEBUG_EXPR(out_msg.DataBlk); out_msg.MessageSize := MessageSizeType:Response_Data; } } action(f_sendInvToSharers, "f", desc="invalidate sharers for L2 replacement") { - enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency="1") { + enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency=to_l1_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:INV; out_msg.Requestor := machineID; @@ -516,7 +544,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(fw_sendFwdInvToSharers, "fw", desc="invalidate sharers for request") { peek(L1RequestIntraChipL2Network_in, RequestMsg) { - enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency="1") { + enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency=to_l1_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:INV; out_msg.Requestor := in_msg.Requestor; @@ -529,7 +557,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(fwm_sendFwdInvToSharersMinusRequestor, "fwm", desc="invalidate sharers for request, requestor is sharer") { peek(L1RequestIntraChipL2Network_in, RequestMsg) { - enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency="1") { + enqueue(L1RequestIntraChipL2Network_out, RequestMsg, latency=to_l1_latency) { out_msg.Address := address; out_msg.Type := CoherenceRequestType:INV; out_msg.Requestor := in_msg.Requestor; @@ -621,7 +649,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(qq_allocateL2CacheBlock, "\q", desc="Set L2 cache tag equal to tag of block B.") { if (L2cacheMemory.isTagPresent(address) == false) { - L2cacheMemory.allocate(address); + L2cacheMemory.allocate(address, new Entry); } } @@ -631,7 +659,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(t_sendWBAck, "t", desc="Send writeback ACK") { peek(L1RequestIntraChipL2Network_in, RequestMsg) { - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:WB_ACK; out_msg.Sender := machineID; @@ -643,7 +671,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { action(ts_sendInvAckToUpgrader, "ts", desc="Send ACK to upgrader") { peek(L1RequestIntraChipL2Network_in, RequestMsg) { - enqueue(responseIntraChipL2Network_out, ResponseMsg, latency="1") { + enqueue(responseIntraChipL2Network_out, ResponseMsg, latency=to_l1_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; @@ -715,6 +743,11 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { L1RequestIntraChipL2Network_in.recycle(); } + action(zn_recycleResponseNetwork, "zn", desc="recycle memory request") { + responseIntraChipL2Network_in.recycle(); + } + + //***************************************************** // TRANSITIONS //***************************************************** @@ -736,6 +769,15 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { zz_recycleL1RequestQueue; } + transition({IM, IS, ISS, SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) { + zn_recycleResponseNetwork; + } + + transition({S_I, M_I, MT_I}, MEM_Inv) { + o_popIncomingResponseQueue; + } + + transition({SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_GET_INSTR, L1_GETX, L1_UPGRADE}) { zz_recycleL1RequestQueue; } @@ -846,12 +888,13 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { rr_deallocateL2CacheBlock; } - transition(SS, L2_Replacement, S_I) { + transition(SS, {L2_Replacement, MEM_Inv}, S_I) { i_allocateTBE; f_sendInvToSharers; rr_deallocateL2CacheBlock; } + transition(M, L1_GETX, MT_MB) { d_sendDataToRequestor; uu_profileMiss; @@ -874,13 +917,15 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { jj_popL1RequestQueue; } - transition(M, L2_Replacement, M_I) { + transition(M, {L2_Replacement, MEM_Inv}, M_I) { i_allocateTBE; c_exclusiveReplacement; rr_deallocateL2CacheBlock; } transition(M, L2_Replacement_clean, M_I) { + i_allocateTBE; + c_exclusiveCleanReplacement; rr_deallocateL2CacheBlock; } @@ -902,7 +947,7 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { jj_popL1RequestQueue; } - transition(MT, L2_Replacement, MT_I) { + transition(MT, {L2_Replacement, MEM_Inv}, MT_I) { i_allocateTBE; f_sendInvToSharers; rr_deallocateL2CacheBlock; @@ -977,8 +1022,8 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { o_popIncomingResponseQueue; } - transition(I_I, Ack_all, NP) { - s_deallocateTBE; + transition(I_I, Ack_all, M_I) { + c_exclusiveCleanReplacement; o_popIncomingResponseQueue; } @@ -988,8 +1033,8 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { o_popIncomingResponseQueue; } - transition(MCT_I, WB_Data_clean, NP) { - s_deallocateTBE; + transition(MCT_I, {WB_Data_clean, Ack_all}, M_I) { + c_exclusiveCleanReplacement; o_popIncomingResponseQueue; } @@ -999,11 +1044,6 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { o_popIncomingResponseQueue; } - // clean data that L1 exclusive never wrote - transition(MCT_I, Ack_all, NP) { - s_deallocateTBE; - o_popIncomingResponseQueue; - } // drop this because L1 will send data again // the reason we don't accept is that the request virtual network may be completely backed up @@ -1037,3 +1077,5 @@ machine(L2Cache, "MOSI Directory L2 Cache CMP") { } } + + diff --git a/src/mem/protocol/MESI_CMP_directory-mem.sm b/src/mem/protocol/MESI_CMP_directory-mem.sm index 84768c333..f5a2e431b 100644 --- a/src/mem/protocol/MESI_CMP_directory-mem.sm +++ b/src/mem/protocol/MESI_CMP_directory-mem.sm @@ -35,14 +35,17 @@ // Copied here by aep 12/14/07 -machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATENCY LATENCY_TO_MEM_CTRL_LATENCY { +machine(Directory, "MESI_CMP_filter_directory protocol") + : int to_mem_ctrl_latency, + int directory_latency +{ MessageBuffer requestToDir, network="From", virtual_network="2", ordered="false"; MessageBuffer responseToDir, network="From", virtual_network="3", ordered="false"; MessageBuffer responseFromDir, network="To", virtual_network="3", ordered="false"; - MessageBuffer dmaRequestFromDir, network="To", virtual_network="4", ordered="true", no_vector="true"; - MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true", no_vector="true"; + MessageBuffer dmaRequestFromDir, network="To", virtual_network="6", ordered="true"; + MessageBuffer dmaRequestToDir, network="From", virtual_network="7", ordered="true"; // STATES @@ -50,7 +53,15 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN // Base states I, desc="Owner"; ID, desc="Intermediate state for DMA_READ when in I"; - ID_W, desc="Intermediate state for DMA_WRITE when in I"; + ID_W, desc="Intermediate state for DMA_WRITE when in I"; + + M, desc="Modified"; + IM, desc="Intermediate State I>M"; + MI, desc="Intermediate State M>I"; + M_DRD, desc="Intermediate State when there is a dma read"; + M_DRDI, desc="Intermediate State when there is a dma read"; + M_DWR, desc="Intermediate State when there is a dma write"; + M_DWRI, desc="Intermediate State when there is a dma write"; } // Events @@ -62,7 +73,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN //added by SS for dma DMA_READ, desc="A DMA Read memory request"; DMA_WRITE, desc="A DMA Write memory request"; - + CleanReplacement, desc="Clean Replacement in L2 cache"; } @@ -70,7 +81,10 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN // DirectoryEntry structure(Entry, desc="...") { + State DirectoryState, desc="Directory state"; DataBlock DataBlk, desc="data for the block"; + NetDest Sharers, desc="Sharers for this block"; + NetDest Owner, desc="Owner of this block"; } external_type(DirectoryMemory) { @@ -83,6 +97,21 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN } + // TBE entries for DMA requests + structure(TBE, desc="TBE entries for outstanding DMA requests") { + Address PhysicalAddress, desc="physical address"; + State TBEState, desc="Transient State"; + DataBlock DataBlk, desc="Data to be written (DMA write only)"; + int Len, desc="..."; + } + + external_type(TBETable) { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } + // ** OBJECTS ** @@ -94,13 +123,40 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])'; + TBETable TBEs, template_hack="<Directory_TBE>"; + State getState(Address addr) { - return State:I; - } + if (TBEs.isPresent(addr)) { + return TBEs[addr].TBEState; + } else if (directory.isPresent(addr)) { + return directory[addr].DirectoryState; + } else { + return State:I; + } + } + void setState(Address addr, State state) { + + if (TBEs.isPresent(addr)) { + TBEs[addr].TBEState := state; + } + + if (directory.isPresent(addr)) { + + if (state == State:I) { + assert(directory[addr].Owner.count() == 0); + assert(directory[addr].Sharers.count() == 0); + } else if (state == State:M) { + assert(directory[addr].Owner.count() == 1); + assert(directory[addr].Sharers.count() == 0); + } + + directory[addr].DirectoryState := state; + } } + bool isGETRequest(CoherenceRequestType type) { return (type == CoherenceRequestType:GETS) || (type == CoherenceRequestType:GET_INSTR) || @@ -120,9 +176,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN if (dmaRequestQueue_in.isReady()) { peek(dmaRequestQueue_in, DMARequestMsg) { if (in_msg.Type == DMARequestType:READ) { - trigger(Event:DMA_READ, in_msg.PhysicalAddress); + trigger(Event:DMA_READ, in_msg.LineAddress); } else if (in_msg.Type == DMARequestType:WRITE) { - trigger(Event:DMA_WRITE, in_msg.PhysicalAddress); + trigger(Event:DMA_WRITE, in_msg.LineAddress); } else { error("Invalid message"); } @@ -151,6 +207,8 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN assert(in_msg.Destination.isElement(machineID)); if (in_msg.Type == CoherenceResponseType:MEMORY_DATA) { trigger(Event:Data, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:CleanReplacement, in_msg.Address); } else { DEBUG_EXPR(in_msg.Type); error("Invalid message"); @@ -179,12 +237,12 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN // Actions action(a_sendAck, "a", desc="Send ack to L2") { - peek(memQueue_in, MemoryMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="TO_MEM_CTRL_LATENCY") { + peek(responseNetwork_in, ResponseMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:MEMORY_ACK; out_msg.Sender := machineID; - out_msg.Destination.add(in_msg.OriginalRequestorMachId); + out_msg.Destination.add(in_msg.Sender); out_msg.MessageSize := MessageSizeType:Response_Control; } } @@ -192,7 +250,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN action(d_sendData, "d", desc="Send data to requestor") { peek(memQueue_in, MemoryMsg) { - enqueue(responseNetwork_out, ResponseMsg, latency="TO_MEM_CTRL_LATENCY") { + enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) { out_msg.Address := address; out_msg.Type := CoherenceResponseType:MEMORY_DATA; out_msg.Sender := machineID; @@ -204,6 +262,19 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN } } + // Actions + action(aa_sendAck, "aa", desc="Send ack to L2") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:MEMORY_ACK; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.OriginalRequestorMachId); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + action(j_popIncomingRequestQueue, "j", desc="Pop incoming request queue") { requestNetwork_in.dequeue(); } @@ -218,7 +289,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { peek(requestNetwork_in, RequestMsg) { - enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") { + enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) { out_msg.Address := address; out_msg.Type := MemoryRequestType:MEMORY_READ; out_msg.Sender := machineID; @@ -234,7 +305,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN action(qw_queueMemoryWBRequest, "qw", desc="Queue off-chip writeback request") { peek(responseNetwork_in, ResponseMsg) { - enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") { + enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) { out_msg.Address := address; out_msg.Type := MemoryRequestType:MEMORY_WB; out_msg.Sender := machineID; @@ -258,7 +329,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN //added by SS for dma action(qf_queueMemoryFetchRequestDMA, "qfd", desc="Queue off-chip fetch request") { peek(dmaRequestQueue_in, DMARequestMsg) { - enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") { + enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) { out_msg.Address := address; out_msg.Type := MemoryRequestType:MEMORY_READ; out_msg.Sender := machineID; @@ -276,7 +347,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN action(dr_sendDMAData, "dr", desc="Send Data to DMA controller from directory") { peek(memQueue_in, MemoryMsg) { - enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="MEMORY_LATENCY") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) { out_msg.PhysicalAddress := address; out_msg.Type := DMAResponseType:DATA; out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be @@ -288,18 +359,22 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN action(dw_writeDMAData, "dw", desc="DMA Write data to memory") { peek(dmaRequestQueue_in, DMARequestMsg) { - directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len); + //directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len); + + directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.PhysicalAddress), in_msg.Len); } } action(qw_queueMemoryWBRequest_partial, "qwp", desc="Queue off-chip writeback request") { peek(dmaRequestQueue_in, DMARequestMsg) { - enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") { + enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) { out_msg.Address := address; out_msg.Type := MemoryRequestType:MEMORY_WB; out_msg.OriginalRequestorMachId := machineID; //out_msg.DataBlk := in_msg.DataBlk; - out_msg.DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len); + out_msg.DataBlk.copyPartial(in_msg.DataBlk, addressOffset(address), in_msg.Len); + + out_msg.MessageSize := in_msg.MessageSize; //out_msg.Prefetch := in_msg.Prefetch; @@ -309,7 +384,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN } action(da_sendDMAAck, "da", desc="Send Ack to DMA controller") { - enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="MEMORY_LATENCY") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) { out_msg.PhysicalAddress := address; out_msg.Type := DMAResponseType:ACK; out_msg.Destination.add(map_Address_to_DMA(address)); @@ -318,33 +393,123 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN } action(z_recycleRequestQueue, "z", desc="recycle request queue") { - requestNetwork_in.dequeue(); + requestNetwork_in.recycle(); + } + + action(zz_recycleDMAQueue, "zz", desc="recycle DMA queue") { + dmaRequestQueue_in.recycle(); + } + + + action(e_ownerIsRequestor, "e", desc="The owner is now the requestor") { + peek(requestNetwork_in, RequestMsg) { + directory[address].Owner.clear(); + directory[address].Owner.add(in_msg.Requestor); + } } + + action(inv_sendCacheInvalidate, "inv", desc="Invalidate a cache block") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:INV; + out_msg.Sender := machineID; + out_msg.Destination := directory[in_msg.PhysicalAddress].Owner; + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + + + action(drp_sendDMAData, "drp", desc="Send Data to DMA controller from incoming PUTX") { + peek(responseNetwork_in, ResponseMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) { + out_msg.PhysicalAddress := address; + out_msg.Type := DMAResponseType:DATA; + out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be + out_msg.Destination.add(map_Address_to_DMA(address)); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(c_clearOwner, "c", desc="Clear the owner field") { + directory[address].Owner.clear(); + } + + action(v_allocateTBE, "v", desc="Allocate TBE") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DataBlk := in_msg.DataBlk; + TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].Len := in_msg.Len; + } + } + + action(dwt_writeDMADataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { + //directory[address].DataBlk.copyPartial(TBEs[address].DataBlk, TBEs[address].Offset, TBEs[address].Len); + directory[address].DataBlk.copyPartial(TBEs[address].DataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + + + } + + + action(qw_queueMemoryWBRequest_partialTBE, "qwt", desc="Queue off-chip writeback request") { + peek(responseNetwork_in, ResponseMsg) { + enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + out_msg.OriginalRequestorMachId := in_msg.Sender; + //out_msg.DataBlk := in_msg.DataBlk; + //out_msg.DataBlk.copyPartial(TBEs[address].DataBlk, TBEs[address].Offset, TBEs[address].Len); + out_msg.DataBlk.copyPartial(TBEs[address].DataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + + out_msg.MessageSize := in_msg.MessageSize; + //out_msg.Prefetch := in_msg.Prefetch; + + DEBUG_EXPR(out_msg); + } + } + } + + action(w_deallocateTBE, "w", desc="Deallocate TBE") { + TBEs.deallocate(address); + } + + // TRANSITIONS - transition(I, Fetch) { - //d_sendData; + + transition(I, Fetch, IM) { qf_queueMemoryFetchRequest; + e_ownerIsRequestor; j_popIncomingRequestQueue; } - transition(I, Data) { + transition(IM, Memory_Data, M) { + d_sendData; + l_popMemQueue; + } +//added by SS + transition(M, CleanReplacement, I) { + c_clearOwner; + a_sendAck; + k_popIncomingResponseQueue; + } + + transition(M, Data, MI) { m_writeDataToMemory; - //a_sendAck; qw_queueMemoryWBRequest; k_popIncomingResponseQueue; } - transition(I, Memory_Data) { - d_sendData; + transition(MI, Memory_Ack, I) { + c_clearOwner; + aa_sendAck; l_popMemQueue; } - transition(I, Memory_Ack) { - a_sendAck; - l_popMemQueue; - } //added by SS for dma support transition(I, DMA_READ, ID) { @@ -368,9 +533,52 @@ machine(Directory, "MESI_CMP_filter_directory protocol") : LATENCY_MEMORY_LATEN l_popMemQueue; } - transition({ID, ID_W}, {Fetch, Data} ) { + transition({ID, ID_W, M_DRDI, M_DWRI, IM, MI}, {Fetch, Data} ) { z_recycleRequestQueue; } + transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, MI}, {DMA_WRITE, DMA_READ} ) { + zz_recycleDMAQueue; + } + + + transition(M, DMA_READ, M_DRD) { + inv_sendCacheInvalidate; + p_popIncomingDMARequestQueue; + } + + transition(M_DRD, Data, M_DRDI) { + drp_sendDMAData; + m_writeDataToMemory; + qw_queueMemoryWBRequest; + k_popIncomingResponseQueue; + } + + transition(M_DRDI, Memory_Ack, I) { + aa_sendAck; + c_clearOwner; + l_popMemQueue; + } + + transition(M, DMA_WRITE, M_DWR) { + v_allocateTBE; + inv_sendCacheInvalidate; + p_popIncomingDMARequestQueue; + } + + transition(M_DWR, Data, M_DWRI) { + m_writeDataToMemory; + qw_queueMemoryWBRequest_partialTBE; + k_popIncomingResponseQueue; + } + + transition(M_DWRI, Memory_Ack, I) { + dwt_writeDMADataFromTBE; + aa_sendAck; + c_clearOwner; + da_sendDMAAck; + w_deallocateTBE; + l_popMemQueue; + } } diff --git a/src/mem/protocol/MESI_CMP_directory-msg.sm b/src/mem/protocol/MESI_CMP_directory-msg.sm index e726b062c..15934e6b2 100644 --- a/src/mem/protocol/MESI_CMP_directory-msg.sm +++ b/src/mem/protocol/MESI_CMP_directory-msg.sm @@ -1,32 +1,57 @@ /* - * Copyright (c) 1999-2005 Mark D. Hill and David A. Wood - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ + Copyright (C) 1999-2005 by Mark D. Hill and David A. Wood for the + Wisconsin Multifacet Project. Contact: gems@cs.wisc.edu + http://www.cs.wisc.edu/gems/ + + -------------------------------------------------------------------- + + This file is part of the SLICC (Specification Language for + Implementing Cache Coherence), a component of the Multifacet GEMS + (General Execution-driven Multiprocessor Simulator) software + toolset originally developed at the University of Wisconsin-Madison. + + SLICC was originally developed by Milo Martin with substantial + contributions from Daniel Sorin. + + Substantial further development of Multifacet GEMS at the + University of Wisconsin was performed by Alaa Alameldeen, Brad + Beckmann, Jayaram Bobba, Ross Dickson, Dan Gibson, Pacia Harper, + Derek Hower, Milo Martin, Michael Marty, Carl Mauer, Michelle Moravan, + Kevin Moore, Manoj Plakal, Daniel Sorin, Haris Volos, Min Xu, and Luke Yen. + + -------------------------------------------------------------------- + + If your use of this software contributes to a published paper, we + request that you (1) cite our summary paper that appears on our + website (http://www.cs.wisc.edu/gems/) and (2) e-mail a citation + for your published paper to gems@cs.wisc.edu. + + If you redistribute derivatives of this software, we request that + you notify us and either (1) ask people to register with us at our + website (http://www.cs.wisc.edu/gems/) or (2) collect registration + information and periodically send it to us. + -------------------------------------------------------------------- + + Multifacet GEMS is free software; you can redistribute it and/or + modify it under the terms of version 2 of the GNU General Public + License as published by the Free Software Foundation. + + Multifacet GEMS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the Multifacet GEMS; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA + + The GNU General Public License is contained in the file LICENSE. + +### END HEADER ### +*/ /* * $Id: MSI_MOSI_CMP_directory-msg.sm 1.5 05/01/19 15:48:37-06:00 mikem@royal16.cs.wisc.edu $ * @@ -40,6 +65,12 @@ enumeration(CoherenceRequestType, desc="...") { GET_INSTR, desc="Get Instruction"; INV, desc="INValidate"; PUTX, desc="replacement message"; + + WB_ACK, desc="Writeback ack"; + WB_NACK, desc="Writeback neg. ack"; + FWD, desc="Generic FWD"; + + } // CoherenceResponseType @@ -52,6 +83,7 @@ enumeration(CoherenceResponseType, desc="...") { WB_ACK, desc="writeback ack"; UNBLOCK, desc="unblock"; EXCLUSIVE_UNBLOCK, desc="exclusive unblock"; + INV, desc="Invalidate from directory"; } // RequestMsg @@ -94,6 +126,7 @@ enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") { structure(DMARequestMsg, desc="...", interface="NetworkMessage") { DMARequestType Type, desc="Request type (read/write)"; Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; NetDest Destination, desc="Destination"; DataBlock DataBlk, desc="DataBlk attached to this request"; int Offset, desc="The offset into the datablock"; @@ -104,6 +137,7 @@ structure(DMARequestMsg, desc="...", interface="NetworkMessage") { structure(DMAResponseMsg, desc="...", interface="NetworkMessage") { DMAResponseType Type, desc="Response type (DATA/ACK)"; Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; NetDest Destination, desc="Destination"; DataBlock DataBlk, desc="DataBlk attached to this request"; MessageSizeType MessageSize, desc="size category of the message"; diff --git a/src/mem/protocol/MI_example-dir.sm b/src/mem/protocol/MI_example-dir.sm index 9502a40b4..c045419b6 100644 --- a/src/mem/protocol/MI_example-dir.sm +++ b/src/mem/protocol/MI_example-dir.sm @@ -21,7 +21,8 @@ machine(Directory, "Directory protocol") M_DRD, desc="Blocked on an invalidation for a DMA read"; M_DWR, desc="Blocked on an invalidation for a DMA write"; - M_DWRI, desc="Intermediate state M_DWR-->I"; + M_DWRI, desc="Intermediate state M_DWR-->I"; + M_DRDI, desc="Intermediate state M_DRD-->I"; IM, desc="Intermediate state I-->M"; MI, desc="Intermediate state M-->I"; @@ -306,11 +307,11 @@ machine(Directory, "Directory protocol") action(inv_sendCacheInvalidate, "inv", desc="Invalidate a cache block") { peek(dmaRequestQueue_in, DMARequestMsg) { enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) { - out_msg.Address := address; - out_msg.Type := CoherenceRequestType:INV; - out_msg.Requestor := machineID; - out_msg.Destination := directory[in_msg.PhysicalAddress].Owner; - out_msg.MessageSize := MessageSizeType:Writeback_Control; + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:INV; + out_msg.Requestor := machineID; + out_msg.Destination := directory[in_msg.PhysicalAddress].Owner; + out_msg.MessageSize := MessageSizeType:Writeback_Control; } } } @@ -323,16 +324,15 @@ machine(Directory, "Directory protocol") dmaRequestQueue_in.dequeue(); } - action(l_writeDataToMemory, "l", desc="Write PUTX data to memory") { + action(l_writeDataToMemory, "pl", desc="Write PUTX data to memory") { peek(requestQueue_in, RequestMsg) { // assert(in_msg.Dirty); // assert(in_msg.MessageSize == MessageSizeType:Writeback_Data); directory[in_msg.Address].DataBlk := in_msg.DataBlk; - DEBUG_EXPR(in_msg.Address); - DEBUG_EXPR(in_msg.DataBlk); + //directory[in_msg.Address].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.Address), in_msg.Len); } } - + action(dwt_writeDMADataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { directory[address].DataBlk.copyPartial(TBEs[address].DataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); } @@ -416,7 +416,8 @@ machine(Directory, "Directory protocol") out_msg.Address := address; out_msg.Type := MemoryRequestType:MEMORY_WB; out_msg.OriginalRequestorMachId := in_msg.Requestor; - //out_msg.DataBlk := in_msg.DataBlk; + // get incoming data + // out_msg.DataBlk := in_msg.DataBlk; out_msg.DataBlk.copyPartial(TBEs[address].DataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); out_msg.MessageSize := in_msg.MessageSize; //out_msg.Prefetch := in_msg.Prefetch; @@ -448,23 +449,26 @@ machine(Directory, "Directory protocol") } action(w_writeDataToMemoryFromTBE, "\w", desc="Write date to directory memory from TBE") { - directory[address].DataBlk := TBEs[address].DataBlk; + //directory[address].DataBlk := TBEs[address].DataBlk; + directory[address].DataBlk.copyPartial(TBEs[address].DataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + } // TRANSITIONS - transition({M_DRD, M_DWR, M_DWRI}, GETX) { + transition({M_DRD, M_DWR, M_DWRI, M_DRDI}, GETX) { z_recycleRequestQueue; } transition({IM, MI, ID, ID_W}, {GETX, GETS, PUTX, PUTX_NotOwner} ) { z_recycleRequestQueue; } - + transition({IM, MI, ID, ID_W}, {DMA_READ, DMA_WRITE} ) { y_recycleDMARequestQueue; } + transition(I, GETX, IM) { //d_sendData; qf_queueMemoryFetchRequest; @@ -507,18 +511,27 @@ machine(Directory, "Directory protocol") } transition(M, DMA_READ, M_DRD) { + v_allocateTBE; inv_sendCacheInvalidate; p_popIncomingDMARequestQueue; } - transition(M_DRD, PUTX, I) { + transition(M_DRD, PUTX, M_DRDI) { + l_writeDataToMemory; drp_sendDMAData; c_clearOwner; - a_sendWriteBackAck; - d_deallocateDirectory; + l_queueMemoryWBRequest; i_popIncomingRequestQueue; } + transition(M_DRDI, Memory_Ack, I) { + l_sendWriteBackAck; + w_deallocateTBE; + d_deallocateDirectory; + l_popMemQueue; + } + + transition(M, DMA_WRITE, M_DWR) { v_allocateTBE; inv_sendCacheInvalidate; @@ -526,6 +539,7 @@ machine(Directory, "Directory protocol") } transition(M_DWR, PUTX, M_DWRI) { + l_writeDataToMemory; qw_queueMemoryWBRequest_partialTBE; c_clearOwner; i_popIncomingRequestQueue; @@ -547,6 +561,7 @@ machine(Directory, "Directory protocol") } transition(M, PUTX, MI) { + l_writeDataToMemory; c_clearOwner; v_allocateTBEFromRequestNet; l_queueMemoryWBRequest; diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm index 28800b2bd..db2efd3e7 100644 --- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm @@ -90,6 +90,7 @@ machine(L1Cache, "Directory protocol") Own_GETX, desc="We observe our own GetX forwarded back to us"; Fwd_GETX, desc="A GetX from another processor"; Fwd_GETS, desc="A GetS from another processor"; + Fwd_DMA, desc="A GetS from another processor"; Inv, desc="Invalidations from the directory"; // Responses @@ -309,7 +310,7 @@ machine(L1Cache, "Directory protocol") assert(in_msg.Destination.isElement(machineID)); DEBUG_EXPR("MRM_DEBUG: L1 received"); DEBUG_EXPR(in_msg.Type); -if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestType:DMA_READ || in_msg.Type == CoherenceRequestType:DMA_WRITE) { +if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestType:DMA_WRITE) { if (in_msg.Requestor == machineID && in_msg.RequestorMachine == MachineType:L1Cache) { trigger(Event:Own_GETX, in_msg.Address); } else { @@ -317,6 +318,8 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT } } else if (in_msg.Type == CoherenceRequestType:GETS) { trigger(Event:Fwd_GETS, in_msg.Address); + } else if (in_msg.Type == CoherenceRequestType:DMA_READ) { + trigger(Event:Fwd_DMA, in_msg.Address); } else if (in_msg.Type == CoherenceRequestType:WB_ACK) { trigger(Event:Writeback_Ack, in_msg.Address); } else if (in_msg.Type == CoherenceRequestType:WB_ACK_DATA) { @@ -826,7 +829,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT zz_recycleMandatoryQueue; } - transition({M_W, MM_W}, {Fwd_GETS, Fwd_GETX, Own_GETX, Inv}) { + transition({M_W, MM_W}, {Fwd_GETS, Fwd_DMA, Fwd_GETX, Own_GETX, Inv}) { z_recycleRequestQueue; } @@ -892,7 +895,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT l_popForwardQueue; } - transition(S, Fwd_GETS) { + transition(S, {Fwd_GETS, Fwd_DMA}) { e_sendData; l_popForwardQueue; } @@ -921,7 +924,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT l_popForwardQueue; } - transition(O, Fwd_GETS) { + transition(O, {Fwd_GETS, Fwd_DMA}) { e_sendData; l_popForwardQueue; } @@ -953,6 +956,12 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT l_popForwardQueue; } + transition(MM, Fwd_DMA, MM) { + //ee_sendDataExclusive; + e_sendData; + l_popForwardQueue; + } + // Transitions from M transition({M, M_W}, {Load, Ifetch}) { h_load_hit; @@ -986,6 +995,11 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT l_popForwardQueue; } + transition(M, Fwd_DMA, M) { + e_sendData; + l_popForwardQueue; + } + // Transitions from IM transition(IM, Inv) { @@ -1025,7 +1039,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT n_popResponseQueue; } - transition(SM, Fwd_GETS) { + transition(SM, {Fwd_DMA, Fwd_GETS}) { e_sendData; l_popForwardQueue; } @@ -1044,7 +1058,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT l_popForwardQueue; } - transition(OM, Fwd_GETS, OM) { + transition(OM, {Fwd_DMA, Fwd_GETS}, OM) { e_sendData; l_popForwardQueue; } @@ -1105,12 +1119,17 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT l_popForwardQueue; } + transition(MI, Fwd_DMA, MI) { + q_sendDataFromTBEToCache; + l_popForwardQueue; + } + transition(MI, Fwd_GETX, II) { q_sendExclusiveDataFromTBEToCache; l_popForwardQueue; } - transition({SI, OI}, Fwd_GETS) { + transition({SI, OI}, {Fwd_DMA, Fwd_GETS}) { q_sendDataFromTBEToCache; l_popForwardQueue; } diff --git a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm index 68d3a2cd3..9ee909199 100644 --- a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm @@ -138,6 +138,7 @@ machine(L2Cache, "Token protocol") L1_PUTS, desc="local sharer wants to writeback"; Fwd_GETX, desc="A GetX from another processor"; Fwd_GETS, desc="A GetS from another processor"; + Fwd_DMA, desc="A request from DMA"; Own_GETX, desc="A GetX from this node"; Inv, desc="Invalidations from the directory"; @@ -584,7 +585,7 @@ machine(L2Cache, "Token protocol") in_port(requestNetwork_in, RequestMsg, GlobalRequestToL2Cache) { if (requestNetwork_in.isReady()) { peek(requestNetwork_in, RequestMsg) { - if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestType:DMA_READ || in_msg.Type == CoherenceRequestType:DMA_WRITE) { + if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestType:DMA_WRITE) { if (in_msg.Requestor == machineID) { trigger(Event:Own_GETX, in_msg.Address); } else { @@ -592,6 +593,8 @@ machine(L2Cache, "Token protocol") } } else if (in_msg.Type == CoherenceRequestType:GETS) { trigger(Event:Fwd_GETS, in_msg.Address); + } else if(in_msg.Type == CoherenceRequestType:DMA_READ) { + trigger(Event:Fwd_DMA, in_msg.Address); } else if (in_msg.Type == CoherenceRequestType:INV) { trigger(Event:Inv, in_msg.Address); } else if (in_msg.Type == CoherenceRequestType:WB_ACK) { @@ -1456,7 +1459,15 @@ machine(L2Cache, "Token protocol") zz_recycleResponseQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS}, {Fwd_GETX, Fwd_GETS, Inv}) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) { + zz_recycleRequestQueue; + } + + transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS}, {Inv}) { + zz_recycleRequestQueue; + } + + transition({IGM, IGS}, {Own_GETX}) { zz_recycleRequestQueue; } @@ -1521,6 +1532,27 @@ machine(L2Cache, "Token protocol") m_popRequestQueue; } + transition({ILOS, ILOSX}, Fwd_DMA) { + i_allocateTBE; + t_recordFwdSID; + j_forwardGlobalRequestToLocalOwner; + m_popRequestQueue; + } + + transition({ILO, ILX, ILOX}, Fwd_DMA) { + i_allocateTBE; + t_recordFwdSID; + j_forwardGlobalRequestToLocalOwner; + m_popRequestQueue; + } + + transition({ILOS, ILOSX, ILO, ILX, ILOX, ILXW}, Data) { + i_copyDataToTBE; + c_sendDataFromTBEToFwdGETS; + s_deallocateTBE; + n_popResponseQueue; + } + transition(IFGS, Data, ILO) { i_copyDataToTBE; c_sendDataFromTBEToFwdGETS; @@ -1598,7 +1630,7 @@ machine(L2Cache, "Token protocol") m_popRequestQueue; } - transition({O, OLS}, Fwd_GETS) { + transition({O, OLS}, {Fwd_GETS, Fwd_DMA}) { dd_sendDataToFwdGETS; m_popRequestQueue; } @@ -1609,6 +1641,10 @@ machine(L2Cache, "Token protocol") m_popRequestQueue; } + transition(OLSX, Fwd_DMA) { + dd_sendDataToFwdGETS; + m_popRequestQueue; + } transition(M, Fwd_GETX, I) { dd_sendDataToFwdGETX; @@ -1629,6 +1665,10 @@ machine(L2Cache, "Token protocol") m_popRequestQueue; } + transition(M, Fwd_DMA) { + dd_sendExclusiveDataToFwdGETS; + m_popRequestQueue; + } transition({OLS, OLSX}, Fwd_GETX, OLSF) { i_allocateTBE; @@ -1837,8 +1877,6 @@ machine(L2Cache, "Token protocol") n_popResponseQueue; } - - // LOCAL REQUESTS THAT MUST ISSUE transition(NP, {L1_PUTS, L1_PUTX, L1_PUTO}) { @@ -1949,7 +1987,7 @@ machine(L2Cache, "Token protocol") o_popL1RequestQueue; } - transition(OGMIO, Fwd_GETS) { + transition(OGMIO, {Fwd_GETS, Fwd_DMA}) { t_recordFwdSID; c_sendDataFromTBEToFwdGETS; m_popRequestQueue; @@ -1984,6 +2022,12 @@ machine(L2Cache, "Token protocol") m_popRequestQueue; } + transition(IGMIO, Fwd_DMA) { + t_recordFwdSID; + j_forwardGlobalRequestToLocalOwner; + m_popRequestQueue; + } + transition(IGMIOFS, Data, IGMIO) { i_copyDataToTBE; c_sendDataFromTBEToFwdGETS; @@ -2163,7 +2207,7 @@ machine(L2Cache, "Token protocol") } - transition(IGMO, Fwd_GETS) { + transition(IGMO, {Fwd_GETS, Fwd_DMA}) { t_recordFwdSID; c_sendDataFromTBEToFwdGETS; m_popRequestQueue; @@ -2518,13 +2562,13 @@ machine(L2Cache, "Token protocol") n_popTriggerQueue; } - transition(OLSI, Fwd_GETS) { + transition(OLSI, {Fwd_GETS, Fwd_DMA}) { t_recordFwdSID; c_sendDataFromTBEToFwdGETS; m_popRequestQueue; } - transition({MI, OI}, Fwd_GETS, OI) { + transition({MI, OI}, {Fwd_GETS, Fwd_DMA}, OI) { t_recordFwdSID; c_sendDataFromTBEToFwdGETS; m_popRequestQueue; diff --git a/src/mem/protocol/MOESI_CMP_directory-dir.sm b/src/mem/protocol/MOESI_CMP_directory-dir.sm index f1fb687bb..8e48fc9ab 100644 --- a/src/mem/protocol/MOESI_CMP_directory-dir.sm +++ b/src/mem/protocol/MOESI_CMP_directory-dir.sm @@ -331,6 +331,23 @@ machine(Directory, "Directory protocol") } } + action(p_fwdDataToDMA, "\d", desc="Send data to requestor") { + peek(requestQueue_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency="1") { + out_msg.Address := address; + out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:Directory; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := directory[in_msg.Address].DataBlk; + out_msg.Dirty := false; // By definition, the block is now clean + out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + + action(e_ownerIsUnblocker, "e", desc="The owner is now the unblocker") { peek(unblockNetwork_in, ResponseMsg) { directory[address].Owner.clear(); @@ -592,7 +609,7 @@ machine(Directory, "Directory protocol") i_popIncomingRequestQueue; } - transition(XI_M, Memory_Data, XI_U) { + transition(XI_M, Memory_Data, I) { d_sendDataMsg; // ack count may be zero q_popMemQueue; } @@ -609,9 +626,10 @@ machine(Directory, "Directory protocol") i_popIncomingRequestQueue; } - transition(S, DMA_READ, XI_M) { - qf_queueMemoryFetchRequest; - g_sendInvalidations; // the DMA will collect the invalidations then send an Unblock Exclusive + transition(S, DMA_READ, S) { + //qf_queueMemoryFetchRequest; + p_fwdDataToDMA; + //g_sendInvalidations; // the DMA will collect the invalidations then send an Unblock Exclusive i_popIncomingRequestQueue; } @@ -650,9 +668,9 @@ machine(Directory, "Directory protocol") i_popIncomingRequestQueue; } - transition(O, DMA_READ, XI_U) { + transition(O, DMA_READ, O) { f_forwardRequest; // this will cause the data to go to DMA directly - g_sendInvalidations; // this will cause acks to be sent to the DMA + //g_sendInvalidations; // this will cause acks to be sent to the DMA i_popIncomingRequestQueue; } @@ -684,7 +702,7 @@ machine(Directory, "Directory protocol") } // no exclusive unblock will show up to the directory - transition(M, DMA_READ, XI_U) { + transition(M, DMA_READ, M) { f_forwardRequest; // this will cause the data to go to DMA directly i_popIncomingRequestQueue; } diff --git a/src/mem/protocol/MOESI_CMP_directory-dma.sm b/src/mem/protocol/MOESI_CMP_directory-dma.sm index ae86e24da..6105778bd 100644 --- a/src/mem/protocol/MOESI_CMP_directory-dma.sm +++ b/src/mem/protocol/MOESI_CMP_directory-dma.sm @@ -227,16 +227,18 @@ machine(DMA, "DMA Controller") p_popResponseQueue; } - transition(BUSY_RD, Data) { + transition(BUSY_RD, Data, READY) { t_updateTBEData; - u_updateAckCount; - o_checkForCompletion; + d_dataCallbackFromTBE; + w_deallocateTBE; + //u_updateAckCount; + //o_checkForCompletion; p_popResponseQueue; } transition(BUSY_RD, All_Acks, READY) { d_dataCallbackFromTBE; - u_sendExclusiveUnblockToDir; + //u_sendExclusiveUnblockToDir; w_deallocateTBE; p_popTriggerQueue; } diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh index ccd73c36b..3c8ef56f4 100644 --- a/src/mem/ruby/common/DataBlock.hh +++ b/src/mem/ruby/common/DataBlock.hh @@ -110,6 +110,7 @@ void DataBlock::print(ostream& out) const out << "[ "; for (int i = 0; i < size; i++) { out << setw(2) << setfill('0') << hex << "0x" << (int)m_data[i] << " "; + out << setfill(' '); } out << dec << "]" << flush; } diff --git a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb index aa176d0f1..e20b7249e 100644 --- a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb +++ b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb @@ -26,7 +26,7 @@ num_memories = 1 memory_size_mb = 1024 num_dma = 1 -protocol = "MOESI_CMP_directory" +protocol = "MESI_CMP_directory" # check for overrides @@ -56,7 +56,7 @@ end net_ports = Array.new iface_ports = Array.new -assert(protocol == "MOESI_CMP_directory", __FILE__+" cannot be used with protocol "+protocol); +#assert(protocol == "MESI_CMP_directory", __FILE__+" cannot be used with protocol "+protocol); require protocol+".rb" @@ -72,6 +72,14 @@ num_cores.times { |n| sequencer, num_l2_banks) end + + if protocol == "MESI_CMP_directory" + net_ports << MESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s, + "L1Cache", + icache, dcache, + sequencer, + num_l2_banks) + end } num_l2_banks.times { |n| cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_kb/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU") @@ -82,6 +90,14 @@ num_l2_banks.times { |n| net_ports.last.request_latency = l2_cache_latency + 2 net_ports.last.response_latency = l2_cache_latency + 2 end + + if protocol == "MESI_CMP_directory" + net_ports << MESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s, + "L2Cache", + cache) + end + + } num_memories.times { |n| directory = DirectoryMemory.new("DirectoryMemory_"+n.to_s, memory_size_mb/num_memories) @@ -92,6 +108,14 @@ num_memories.times { |n| directory, memory_control) end + + if protocol == "MESI_CMP_directory" + net_ports << MESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s, + "Directory", + directory, + memory_control) + end + } num_dma.times { |n| dma_sequencer = DMASequencer.new("DMASequencer_"+n.to_s) @@ -101,6 +125,14 @@ num_dma.times { |n| "DMA", dma_sequencer) end + + if protocol == "MESI_CMP_directory" + net_ports << MESI_CMP_directory_DMAController.new("DMAController_"+n.to_s, + "DMA", + dma_sequencer) + end + + } topology = CrossbarTopology.new("theTopology", net_ports) diff --git a/src/mem/ruby/config/cfg.rb b/src/mem/ruby/config/cfg.rb index 16d658a4c..e6bc1b831 100644 --- a/src/mem/ruby/config/cfg.rb +++ b/src/mem/ruby/config/cfg.rb @@ -296,7 +296,6 @@ private end -#require "defaults.rb" @@ -439,27 +438,27 @@ class SetAssociativeCache < Cache cacti_args << 360 << 0 << 0 << 0 << 0 << 1 << 1 << 1 << 1 << 0 << 0 cacti_args << 50 << 10 << 10 << 0 << 1 << 1 - cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ") - - IO.popen(cacti_cmd) { |pipe| - str1 = pipe.readline - str2 = pipe.readline - results = str2.split(", ") - if results.size != 61 - print "CACTI ERROR: CACTI produced unexpected output.\n" - print "Are you using the version shipped with libruby?\n" - raise Exception - end - latency_ns = results[5].to_f - if (latency_ns == "1e+39") - print "CACTI ERROR: CACTI was unable to realistically model the cache ",@obj_name,"\n" - print "Either change the cache parameters or manually set the latency values\n" - raise Exception - end - clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6)) - latency_cycles = (latency_ns / clk_period_ns).ceil - @latency = latency_cycles - } +# cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ") + +# IO.popen(cacti_cmd) { |pipe| +# str1 = pipe.readline +# str2 = pipe.readline +# results = str2.split(", ") +# if results.size != 61 +# print "CACTI ERROR: CACTI produced unexpected output.\n" +# print "Are you using the version shipped with libruby?\n" +# raise Exception +# end +# latency_ns = results[5].to_f +# if (latency_ns == "1e+39") +# print "CACTI ERROR: CACTI was unable to realistically model the cache ",@obj_name,"\n" +# print "Either change the cache parameters or manually set the latency values\n" +# raise Exception +# end +# clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6)) +# latency_cycles = (latency_ns / clk_period_ns).ceil +# @latency = latency_cycles +# } elsif @latency.is_a?(Float) clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6)) latency_cycles = (@latency / clk_period_ns).ceil @@ -540,7 +539,6 @@ class MemoryControl < LibRubyObject end - class Sequencer < IfacePort def cppClassName() @@ -759,4 +757,5 @@ class GarnetFlexiblePipeline < GarnetNetwork end end +#added by SS require "defaults.rb" diff --git a/src/mem/ruby/config/defaults.rb b/src/mem/ruby/config/defaults.rb index ab717cece..68617d76b 100644 --- a/src/mem/ruby/config/defaults.rb +++ b/src/mem/ruby/config/defaults.rb @@ -9,7 +9,7 @@ class NetPort < LibRubyObject # buffer_size limits the size of all other buffers connecting to # SLICC Controllers. When 0, infinite buffering is used. - default_param :buffer_size, Integer, 32 + default_param :buffer_size, Integer, 0 # added by SS for TBE default_param :number_of_TBEs, Integer, 256 @@ -36,7 +36,7 @@ class Debug < LibRubyObject # 1. change protocol_trace = true # 2. enable debug in the Ruby Makefile # 3. set start_time = 1 - default_param :protocol_trace, Boolean, false + default_param :protocol_trace, Boolean, true # a string for filtering debugging output (for all g_debug vars see Debug.h) default_param :filter_string, String, "none" @@ -46,9 +46,9 @@ class Debug < LibRubyObject # filters debugging messages based on a ruby time default_param :start_time, Integer, 1 - + # sends debugging messages to a output filename - default_param :output_filename, String, "none" + default_param :output_filename, String, "debug_ss" end class Topology < LibRubyObject @@ -71,11 +71,11 @@ end class Network < LibRubyObject default_param :endpoint_bandwidth, Integer, 10000 default_param :adaptive_routing, Boolean, true - default_param :number_of_virtual_networks, Integer, 6 + default_param :number_of_virtual_networks, Integer, 10 default_param :fan_out_degree, Integer, 4 # default buffer size. Setting to 0 indicates infinite buffering - default_param :buffer_size, Integer, 3 + default_param :buffer_size, Integer, 0 # local memory latency ?? NetworkLinkLatency default_param :link_latency, Integer, 1 @@ -204,4 +204,31 @@ class RubySystem default_param :profiler, Profiler, Profiler.new("profiler0") end +#added by SS + +class MESI_CMP_directory_L2CacheController < CacheController + default_param :l2_request_latency, Integer, 2 + default_param :l2_response_latency, Integer, 2 + default_param :to_L1_latency, Integer, 1 + +#if 0 then automatically calculated + default_param :lowest_bit, Integer, 0 + default_param :highest_bit, Integer, 0 +end + +class MESI_CMP_directory_L1CacheController < L1CacheController + default_param :l1_request_latency, Integer, 2 + default_param :l1_response_latency, Integer, 2 + default_param :to_L2_latency, Integer, 1 +end + + +class MESI_CMP_directory_DirectoryController < DirectoryController + default_param :to_mem_ctrl_latency, Integer, 1 + default_param :directory_latency, Integer, 6 +end + +class MESI_CMP_directory_DMAController < DMAController + default_param :request_latency, Integer, 6 +end |