diff options
Diffstat (limited to 'src/mem')
-rw-r--r-- | src/mem/protocol/MESI_CMP_directory-L1cache.sm | 363 | ||||
-rw-r--r-- | src/mem/protocol/RubySlicc_Types.sm | 7 | ||||
-rw-r--r-- | src/mem/slicc/ast/ObjDeclAST.py | 3 | ||||
-rw-r--r-- | src/mem/slicc/symbols/StateMachine.py | 12 |
4 files changed, 375 insertions, 10 deletions
diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm index d12e44ba3..bcfb20297 100644 --- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm @@ -31,11 +31,13 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") : Sequencer * sequencer, CacheMemory * L1IcacheMemory, CacheMemory * L1DcacheMemory, + Prefetcher * prefetcher = 'NULL', int l2_select_num_bits, int l1_request_latency = 2, int l1_response_latency = 2, int to_l2_latency = 1, - bool send_evictions + bool send_evictions, + bool enable_prefetch = "False" { // NODE L1 CACHE // From this node's L1 cache TO the network @@ -51,6 +53,9 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false", vnet_type="request"; // a L2 bank -> this L1 MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false", vnet_type="response"; + // Request Buffer for prefetches + MessageBuffer optionalQueue, ordered="false"; + // STATES state_declaration(State, desc="Cache states", default="L1Cache_State_I") { @@ -70,6 +75,11 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") M_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK"; SINK_WB_ACK, AccessPermission:Busy, desc="This is to sink WB_Acks from L2"; + // Transient States in which block is being prefetched + PF_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet"; + PF_IM, AccessPermission:Busy, desc="Issued GETX, have not seen response yet"; + PF_SM, AccessPermission:Busy, desc="Issued GETX, received data, waiting for acks"; + PF_IS_I, AccessPermission:Busy, desc="Issued GETs, saw inv before data"; } // EVENTS @@ -98,6 +108,10 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") Ack_all, desc="Last ack for processor"; WB_Ack, desc="Ack for replacement"; + + PF_Load, desc="load request from prefetcher"; + PF_Ifetch, desc="instruction fetch request from prefetcher"; + PF_Store, desc="exclusive load request from prefetcher"; } // TYPES @@ -107,6 +121,7 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") State CacheState, desc="cache state"; DataBlock DataBlk, desc="data for the block"; bool Dirty, default="false", desc="data is dirty"; + bool isPrefetch, desc="Set if this block was prefetched"; } // TBE fields @@ -227,6 +242,19 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } } + Event prefetch_request_type_to_event(RubyRequestType type) { + if (type == RubyRequestType:LD) { + return Event:PF_Load; + } else if (type == RubyRequestType:IFETCH) { + return Event:PF_Ifetch; + } else if ((type == RubyRequestType:ST) || + (type == RubyRequestType:ATOMIC)) { + return Event:PF_Store; + } else { + error("Invalid RubyRequestType"); + } + } + int getPendingAcks(TBE tbe) { return tbe.pendingAcks; } @@ -234,6 +262,89 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") out_port(requestIntraChipL1Network_out, RequestMsg, requestFromL1Cache); out_port(responseIntraChipL1Network_out, ResponseMsg, responseFromL1Cache); out_port(unblockNetwork_out, ResponseMsg, unblockFromL1Cache); + out_port(optionalQueue_out, RubyRequest, optionalQueue); + + + // Prefetch queue between the controller and the prefetcher + // As per Spracklen et al. (HPCA 2005), the prefetch queue should be + // implemented as a LIFO structure. The structure would allow for fast + // searches of all entries in the queue, not just the head msg. All + // msgs in the structure can be invalidated if a demand miss matches. + in_port(optionalQueue_in, RubyRequest, optionalQueue, desc="...", rank = 3) { + if (optionalQueue_in.isReady()) { + peek(optionalQueue_in, RubyRequest) { + // Instruction Prefetch + if (in_msg.Type == RubyRequestType:IFETCH) { + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + if (is_valid(L1Icache_entry)) { + // The block to be prefetched is already present in the + // cache. We should drop this request. + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + L1Icache_entry, L1_TBEs[in_msg.LineAddress]); + } + + // Check to see if it is in the OTHER L1 + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + if (is_valid(L1Dcache_entry)) { + // The block is in the wrong L1 cache. We should drop + // this request. + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + L1Dcache_entry, L1_TBEs[in_msg.LineAddress]); + } + + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { + // L1 does't have the line, but we have space for it + // in the L1 so let's see if the L2 has it + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + L1Icache_entry, L1_TBEs[in_msg.LineAddress]); + } else { + // No room in the L1, so we need to make room in the L1 + trigger(Event:L1_Replacement, + L1IcacheMemory.cacheProbe(in_msg.LineAddress), + getL1ICacheEntry(L1IcacheMemory.cacheProbe(in_msg.LineAddress)), + L1_TBEs[L1IcacheMemory.cacheProbe(in_msg.LineAddress)]); + } + } else { + // Data prefetch + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + if (is_valid(L1Dcache_entry)) { + // The block to be prefetched is already present in the + // cache. We should drop this request. + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + L1Dcache_entry, L1_TBEs[in_msg.LineAddress]); + } + + // Check to see if it is in the OTHER L1 + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + if (is_valid(L1Icache_entry)) { + // The block is in the wrong L1. Just drop the prefetch + // request. + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + L1Icache_entry, L1_TBEs[in_msg.LineAddress]); + } + + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { + // L1 does't have the line, but we have space for it in + // the L1 let's see if the L2 has it + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + L1Dcache_entry, L1_TBEs[in_msg.LineAddress]); + } else { + // No room in the L1, so we need to make room in the L1 + trigger(Event:L1_Replacement, + L1DcacheMemory.cacheProbe(in_msg.LineAddress), + getL1DCacheEntry(L1DcacheMemory.cacheProbe(in_msg.LineAddress)), + L1_TBEs[L1DcacheMemory.cacheProbe(in_msg.LineAddress)]); + } + } + } + } + } // Response IntraChip L1 Network - response msg to this L1 cache in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache, rank = 2) { @@ -248,7 +359,9 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") trigger(Event:Data_Exclusive, in_msg.Address, cache_entry, tbe); } else if(in_msg.Type == CoherenceResponseType:DATA) { if ((getState(tbe, cache_entry, in_msg.Address) == State:IS || - getState(tbe, cache_entry, in_msg.Address) == State:IS_I) && + getState(tbe, cache_entry, in_msg.Address) == State:IS_I || + getState(tbe, cache_entry, in_msg.Address) == State:PF_IS || + getState(tbe, cache_entry, in_msg.Address) == State:PF_IS_I) && machineIDToMachineType(in_msg.Sender) == MachineType:L1Cache) { trigger(Event:DataS_fromL1, in_msg.Address, cache_entry, tbe); @@ -368,6 +481,14 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } } + void enqueuePrefetch(Address address, RubyRequestType type) { + enqueue(optionalQueue_out, RubyRequest, latency=1) { + out_msg.LineAddress := address; + out_msg.Type := type; + out_msg.AccessMode := RubyAccessMode:Supervisor; + } + } + // ACTIONS action(a_issueGETS, "a", desc="Issue GETS") { peek(mandatoryQueue_in, RubyRequest) { @@ -386,6 +507,24 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } } + action(pa_issuePfGETS, "pa", desc="Issue prefetch GETS") { + peek(optionalQueue_in, RubyRequest) { + enqueue(requestIntraChipL1Network_out, RequestMsg, + latency=l1_request_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); + DPRINTF(RubySlicc, "address: %s, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Prefetch := in_msg.Prefetch; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + action(ai_issueGETINSTR, "ai", desc="Issue GETINSTR") { peek(mandatoryQueue_in, RubyRequest) { enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) { @@ -403,6 +542,26 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } } + action(pai_issuePfGETINSTR, "pai", + desc="Issue GETINSTR for prefetch request") { + peek(optionalQueue_in, RubyRequest) { + enqueue(requestIntraChipL1Network_out, RequestMsg, + latency=l1_request_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GET_INSTR; + out_msg.Requestor := machineID; + out_msg.Destination.add( + mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Prefetch := in_msg.Prefetch; + out_msg.AccessMode := in_msg.AccessMode; + + DPRINTF(RubySlicc, "address: %s, destination: %s\n", + address, out_msg.Destination); + } + } + } action(b_issueGETX, "b", desc="Issue GETX") { peek(mandatoryQueue_in, RubyRequest) { @@ -422,6 +581,29 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } } + action(pb_issuePfGETX, "pb", desc="Issue prefetch GETX") { + peek(optionalQueue_in, RubyRequest) { + enqueue(requestIntraChipL1Network_out, RequestMsg, + latency=l1_request_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + out_msg.Requestor := machineID; + DPRINTF(RubySlicc, "%s\n", machineID); + + out_msg.Destination.add(mapAddressToRange(address, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits)); + + DPRINTF(RubySlicc, "address: %s, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Prefetch := in_msg.Prefetch; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + action(c_issueUPGRADE, "c", desc="Issue GETX") { peek(mandatoryQueue_in, RubyRequest) { enqueue(requestIntraChipL1Network_out, RequestMsg, latency= l1_request_latency) { @@ -584,7 +766,6 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; DPRINTF(RubySlicc, "%s\n", address); - } } @@ -700,6 +881,31 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") } } + action(po_observeMiss, "\po", desc="Inform the prefetcher about the miss") { + peek(mandatoryQueue_in, RubyRequest) { + if (enable_prefetch) { + prefetcher.observeMiss(in_msg.LineAddress, in_msg.Type); + } + } + } + + action(ppm_observePfMiss, "\ppm", + desc="Inform the prefetcher about the partial miss") { + peek(mandatoryQueue_in, RubyRequest) { + prefetcher.observePfMiss(in_msg.LineAddress); + } + } + + action(pq_popPrefetchQueue, "\pq", desc="Pop the prefetch request queue") { + optionalQueue_in.dequeue(); + } + + action(mp_markPrefetched, "mp", desc="Write data from response queue to cache") { + assert(is_valid(cache_entry)); + cache_entry.isPrefetch := true; + } + + //***************************************************** // TRANSITIONS //***************************************************** @@ -709,16 +915,49 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") z_stallAndWaitMandatoryQueue; } + transition({PF_IS, PF_IS_I}, {Store, L1_Replacement}) { + z_stallAndWaitMandatoryQueue; + } + + transition({PF_IM, PF_SM}, {Load, Ifetch, L1_Replacement}) { + z_stallAndWaitMandatoryQueue; + } + // Transitions from Idle transition({NP,I}, L1_Replacement) { ff_deallocateL1CacheBlock; } + transition({S,E,M,IS,IM,SM,IS_I,M_I,SINK_WB_ACK,PF_IS,PF_IM}, + {PF_Load, PF_Store}) { + pq_popPrefetchQueue; + } + transition({NP,I}, Load, IS) { oo_allocateL1DCacheBlock; i_allocateTBE; a_issueGETS; uu_profileDataMiss; + po_observeMiss; + k_popMandatoryQueue; + } + + transition({NP,I}, PF_Load, PF_IS) { + oo_allocateL1DCacheBlock; + i_allocateTBE; + pa_issuePfGETS; + pq_popPrefetchQueue; + } + + transition(PF_IS, Load, IS) { + uu_profileDataMiss; + ppm_observePfMiss; + k_popMandatoryQueue; + } + + transition(PF_IS_I, Load, IS_I) { + uu_profileDataMiss; + ppm_observePfMiss; k_popMandatoryQueue; } @@ -727,6 +966,22 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") i_allocateTBE; ai_issueGETINSTR; uu_profileInstMiss; + po_observeMiss; + k_popMandatoryQueue; + } + + transition({NP,I}, PF_Ifetch, PF_IS) { + pp_allocateL1ICacheBlock; + i_allocateTBE; + pai_issuePfGETINSTR; + pq_popPrefetchQueue; + } + + // We proactively assume that the prefetch is in to + // the instruction cache + transition(PF_IS, Ifetch, IS) { + uu_profileDataMiss; + ppm_observePfMiss; k_popMandatoryQueue; } @@ -735,6 +990,26 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") i_allocateTBE; b_issueGETX; uu_profileDataMiss; + po_observeMiss; + k_popMandatoryQueue; + } + + transition({NP,I}, PF_Store, PF_IM) { + oo_allocateL1DCacheBlock; + i_allocateTBE; + pb_issuePfGETX; + pq_popPrefetchQueue; + } + + transition(PF_IM, Store, IM) { + uu_profileDataMiss; + ppm_observePfMiss; + k_popMandatoryQueue; + } + + transition(PF_SM, Store, SM) { + uu_profileDataMiss; + ppm_observePfMiss; k_popMandatoryQueue; } @@ -870,6 +1145,11 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") l_popRequestQueue; } + transition({PF_IS, PF_IS_I}, Inv, PF_IS_I) { + fi_sendInvAck; + l_popRequestQueue; + } + transition(IS, Data_all_Acks, S) { u_writeDataToL1Cache; h_load_hit; @@ -878,6 +1158,14 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } + transition(PF_IS, Data_all_Acks, S) { + u_writeDataToL1Cache; + s_deallocateTBE; + mp_markPrefetched; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + transition(IS_I, Data_all_Acks, I) { u_writeDataToL1Cache; h_load_hit; @@ -886,6 +1174,12 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } + transition(PF_IS_I, Data_all_Acks, I) { + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + transition(IS, DataS_fromL1, S) { u_writeDataToL1Cache; j_sendUnblock; @@ -895,6 +1189,14 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } + transition(PF_IS, DataS_fromL1, S) { + u_writeDataToL1Cache; + j_sendUnblock; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + transition(IS_I, DataS_fromL1, I) { u_writeDataToL1Cache; j_sendUnblock; @@ -904,6 +1206,13 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } + transition(PF_IS_I, DataS_fromL1, I) { + j_sendUnblock; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + // directory is blocked when sending exclusive data transition(IS_I, Data_Exclusive, E) { u_writeDataToL1Cache; @@ -914,6 +1223,15 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } + // directory is blocked when sending exclusive data + transition(PF_IS_I, Data_Exclusive, E) { + u_writeDataToL1Cache; + jj_sendExclusiveUnblock; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + transition(IS, Data_Exclusive, E) { u_writeDataToL1Cache; h_load_hit; @@ -923,18 +1241,38 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } + transition(PF_IS, Data_Exclusive, E) { + u_writeDataToL1Cache; + jj_sendExclusiveUnblock; + s_deallocateTBE; + mp_markPrefetched; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + // Transitions from IM transition({IM, SM}, Inv, IM) { fi_sendInvAck; l_popRequestQueue; } + transition({PF_IM, PF_SM}, Inv, PF_IM) { + fi_sendInvAck; + l_popRequestQueue; + } + transition(IM, Data, SM) { u_writeDataToL1Cache; q_updateAckCount; o_popIncomingResponseQueue; } + transition(PF_IM, Data, PF_SM) { + u_writeDataToL1Cache; + q_updateAckCount; + o_popIncomingResponseQueue; + } + transition(IM, Data_all_Acks, M) { u_writeDataToL1Cache; hh_store_hit; @@ -944,8 +1282,17 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } + transition(PF_IM, Data_all_Acks, M) { + u_writeDataToL1Cache; + jj_sendExclusiveUnblock; + s_deallocateTBE; + mp_markPrefetched; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + // transitions from SM - transition({SM, IM}, Ack) { + transition({SM, IM, PF_SM, PF_IM}, Ack) { q_updateAckCount; o_popIncomingResponseQueue; } @@ -958,6 +1305,14 @@ machine(L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } + transition(PF_SM, Ack_all, M) { + jj_sendExclusiveUnblock; + s_deallocateTBE; + mp_markPrefetched; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + transition(SINK_WB_ACK, Inv){ fi_sendInvAck; l_popRequestQueue; diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm index 9b18aa417..20249942a 100644 --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2005 Mark D. Hill and David A. Wood * All rights reserved. @@ -188,3 +187,9 @@ structure (GenericBloomFilter, external = "yes") { bool isSet(Address, int); int getCount(Address, int); } + +structure (Prefetcher, external = "yes") { + void observeMiss(Address, RubyRequestType); + void observePfHit(Address); + void observePfMiss(Address); +} diff --git a/src/mem/slicc/ast/ObjDeclAST.py b/src/mem/slicc/ast/ObjDeclAST.py index 389098cd9..4509b4527 100644 --- a/src/mem/slicc/ast/ObjDeclAST.py +++ b/src/mem/slicc/ast/ObjDeclAST.py @@ -41,9 +41,6 @@ class ObjDeclAST(DeclAST): def generate(self): machineComponentSym = False - if "hack" in self: - warning("'hack=' is now deprecated") - if "network" in self and "virtual_network" not in self: self.error("Network queues require a 'virtual_network' attribute") diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index 1547f992b..6c85480ab 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -41,7 +41,8 @@ python_class_map = {"int": "Int", "Sequencer": "RubySequencer", "DirectoryMemory": "RubyDirectoryMemory", "MemoryControl": "MemoryControl", - "DMASequencer": "DMASequencer" + "DMASequencer": "DMASequencer", + "Prefetcher":"Prefetcher" } class StateMachine(Symbol): @@ -49,6 +50,7 @@ class StateMachine(Symbol): super(StateMachine, self).__init__(symtab, ident, location, pairs) self.table = None self.config_parameters = config_parameters + self.prefetchers = [] for param in config_parameters: if param.pointer: @@ -58,6 +60,8 @@ class StateMachine(Symbol): var = Var(symtab, param.name, location, param.type_ast.type, "m_%s" % param.name, {}, self) self.symtab.registerSym(param.name, var) + if str(param.type_ast.type) == "Prefetcher": + self.prefetchers.append(var) self.states = orderdict() self.events = orderdict() @@ -69,9 +73,9 @@ class StateMachine(Symbol): self.objects = [] self.TBEType = None self.EntryType = None - self.message_buffer_names = [] + def __repr__(self): return "[StateMachine: %s]" % self.ident @@ -629,6 +633,10 @@ $vid->setDescription("[Version " + to_string(m_version) + ", ${ident}, name=${{v else: code('$vid->setRecycleLatency(m_recycle_latency);') + # Set the prefetchers + code() + for prefetcher in self.prefetchers: + code('${{prefetcher.code}}.setController(this);') # Set the queue consumers code() |