diff options
-rw-r--r-- | src/mem/protocol/MI_example-cache.sm | 40 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_CMP_token-L1cache.sm | 66 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-cache.sm | 83 | ||||
-rw-r--r-- | src/mem/protocol/RubySlicc_Types.sm | 2 | ||||
-rw-r--r-- | src/mem/ruby/profiler/Profiler.cc | 22 | ||||
-rw-r--r-- | src/mem/ruby/profiler/Profiler.hh | 12 | ||||
-rw-r--r-- | src/mem/ruby/system/Sequencer.cc | 26 | ||||
-rw-r--r-- | src/mem/ruby/system/Sequencer.hh | 14 |
8 files changed, 210 insertions, 55 deletions
diff --git a/src/mem/protocol/MI_example-cache.sm b/src/mem/protocol/MI_example-cache.sm index 0104e1aa2..84975ffd5 100644 --- a/src/mem/protocol/MI_example-cache.sm +++ b/src/mem/protocol/MI_example-cache.sm @@ -121,6 +121,17 @@ machine(L1Cache, "MI Example L1 Cache") } } + GenericMachineType getNondirectHitMachType(MachineID sender) { + if (machineIDToMachineType(sender) == MachineType:L1Cache) { + // + // NOTE direct local hits should not call this + // + return GenericMachineType:L1Cache_wCC; + } else { + return ConvertMachToGenericMach(machineIDToMachineType(sender)); + } + } + // NETWORK PORTS @@ -263,14 +274,35 @@ machine(L1Cache, "MI Example L1 Cache") action(r_load_hit, "r", desc="Notify sequencer the load completed.") { DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.readCallback(address, getCacheEntry(address).DataBlk); + sequencer.readCallback(address, + GenericMachineType:L1Cache, + getCacheEntry(address).DataBlk); + } + + action(rx_load_hit, "rx", desc="External load completed.") { + peek(responseNetwork_in, ResponseMsg) { + DEBUG_EXPR(getCacheEntry(address).DataBlk); + sequencer.readCallback(address, + getNondirectHitMachType(in_msg.Sender), + getCacheEntry(address).DataBlk); + } } action(s_store_hit, "s", desc="Notify sequencer that store completed.") { DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.writeCallback(address, getCacheEntry(address).DataBlk); + sequencer.writeCallback(address, + GenericMachineType:L1Cache, + getCacheEntry(address).DataBlk); } + action(sx_store_hit, "sx", desc="External store completed.") { + peek(responseNetwork_in, ResponseMsg) { + DEBUG_EXPR(getCacheEntry(address).DataBlk); + sequencer.writeCallback(address, + getNondirectHitMachType(in_msg.Sender), + getCacheEntry(address).DataBlk); + } + } action(u_writeDataToCache, "u", desc="Write data to the cache") { peek(responseNetwork_in, ResponseMsg) { @@ -342,14 +374,14 @@ machine(L1Cache, "MI Example L1 Cache") transition(IS, Data, M) { u_writeDataToCache; - r_load_hit; + rx_load_hit; w_deallocateTBE; n_popResponseQueue; } transition(IM, Data, M) { u_writeDataToCache; - s_store_hit; + sx_store_hit; w_deallocateTBE; n_popResponseQueue; } diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm index e3e3fa2cb..fa32b94ab 100644 --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -374,24 +374,27 @@ machine(L1Cache, "Token protocol") } } -// GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) { -// if (machineIDToMachineType(sender) == MachineType:L1Cache) { -// return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this -// } else if (machineIDToMachineType(sender) == MachineType:L2Cache) { -// -// if (sender == (mapAddressToRange(addr, -// MachineType:L2Cache, -// l2_select_low_bit, -// l2_select_num_bits))) { -// -// return GenericMachineType:L2Cache; -// } else { -// return GenericMachineType:L2Cache_wCC; -// } -// } else { -// return ConvertMachToGenericMach(machineIDToMachineType(sender)); -// } -// } + GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) { + if (machineIDToMachineType(sender) == MachineType:L1Cache) { + // + // NOTE direct local hits should not call this + // + return GenericMachineType:L1Cache_wCC; + } else if (machineIDToMachineType(sender) == MachineType:L2Cache) { + + if (sender == (mapAddressToRange(addr, + MachineType:L2Cache, + l2_select_low_bit, + l2_select_num_bits))) { + + return GenericMachineType:L2Cache; + } else { + return GenericMachineType:L2Cache_wCC; + } + } else { + return ConvertMachToGenericMach(machineIDToMachineType(sender)); + } + } bool okToIssueStarving(Address addr, MachineID machinID) { return persistentTable.okToIssueStarving(addr, machineID); @@ -1136,8 +1139,11 @@ machine(L1Cache, "Token protocol") action(h_load_hit, "h", desc="Notify sequencer the load completed.") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); - //sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); - sequencer.readCallback(address, getCacheEntry(address).DataBlk); + + sequencer.readCallback(address, + GenericMachineType:L1Cache, + getCacheEntry(address).DataBlk); + } action(x_external_load_hit, "x", desc="Notify sequencer the load completed.") { @@ -1145,16 +1151,21 @@ machine(L1Cache, "Token protocol") DEBUG_EXPR(getCacheEntry(address).DataBlk); peek(responseNetwork_in, ResponseMsg) { - //sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); - sequencer.readCallback(address, getCacheEntry(address).DataBlk); + sequencer.readCallback(address, + getNondirectHitMachType(address, in_msg.Sender), + getCacheEntry(address).DataBlk); + } } action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") { DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); - //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No); - sequencer.writeCallback(address, getCacheEntry(address).DataBlk); + + sequencer.writeCallback(address, + GenericMachineType:L1Cache, + getCacheEntry(address).DataBlk); + getCacheEntry(address).Dirty := true; DEBUG_EXPR(getCacheEntry(address).DataBlk); } @@ -1163,8 +1174,11 @@ machine(L1Cache, "Token protocol") DEBUG_EXPR(address); DEBUG_EXPR(getCacheEntry(address).DataBlk); peek(responseNetwork_in, ResponseMsg) { - //sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No); - sequencer.writeCallback(address, getCacheEntry(address).DataBlk); + + sequencer.writeCallback(address, + getNondirectHitMachType(address, in_msg.Sender), + getCacheEntry(address).DataBlk); + } getCacheEntry(address).Dirty := true; DEBUG_EXPR(getCacheEntry(address).DataBlk); diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index 325510510..3de72199b 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -114,6 +114,7 @@ machine(L1Cache, "AMD Hammer-like protocol") State CacheState, desc="cache state"; bool Dirty, desc="Is the data dirty (different than memory)?"; DataBlock DataBlk, desc="data for the block"; + bool FromL2, default="false", desc="block just moved from L2"; } // TBE fields @@ -123,6 +124,7 @@ machine(L1Cache, "AMD Hammer-like protocol") bool Dirty, desc="Is the data dirty (different than memory)?"; int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; bool Sharers, desc="On a GetS, did we find any other sharers in the system"; + MachineID LastResponder, desc="last machine to send a response for this request"; } external_type(TBETable) { @@ -214,6 +216,26 @@ machine(L1Cache, "AMD Hammer-like protocol") } } + GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) { + if (machineIDToMachineType(sender) == MachineType:L1Cache) { + // + // NOTE direct local hits should not call this + // + return GenericMachineType:L1Cache_wCC; + } else { + return ConvertMachToGenericMach(machineIDToMachineType(sender)); + } + } + + GenericMachineType testAndClearLocalHit(Address addr) { + if (getCacheEntry(addr).FromL2) { + getCacheEntry(addr).FromL2 := false; + return GenericMachineType:L2Cache; + } else { + return GenericMachineType:L1Cache; + } + } + MessageBuffer triggerQueue, ordered="true"; // ** OUT_PORTS ** @@ -487,12 +509,54 @@ machine(L1Cache, "AMD Hammer-like protocol") action(h_load_hit, "h", desc="Notify sequencer the load completed.") { DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.readCallback(address, getCacheEntry(address).DataBlk); + + sequencer.readCallback(address, + testAndClearLocalHit(address), + getCacheEntry(address).DataBlk); + + } + + action(hx_external_load_hit, "hx", desc="load required external msgs") { + DEBUG_EXPR(getCacheEntry(address).DataBlk); + peek(responseToCache_in, ResponseMsg) { + + sequencer.readCallback(address, + getNondirectHitMachType(in_msg.Address, in_msg.Sender), + getCacheEntry(address).DataBlk); + + } } action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") { DEBUG_EXPR(getCacheEntry(address).DataBlk); - sequencer.writeCallback(address, getCacheEntry(address).DataBlk); + + sequencer.writeCallback(address, + testAndClearLocalHit(address), + getCacheEntry(address).DataBlk); + + getCacheEntry(address).Dirty := true; + } + + action(sx_external_store_hit, "sx", desc="store required external msgs.") { + DEBUG_EXPR(getCacheEntry(address).DataBlk); + peek(responseToCache_in, ResponseMsg) { + + sequencer.writeCallback(address, + getNondirectHitMachType(address, in_msg.Sender), + getCacheEntry(address).DataBlk); + + } + getCacheEntry(address).Dirty := true; + } + + action(sxt_trig_ext_store_hit, "sxt", desc="store required external msgs.") { + DEBUG_EXPR(getCacheEntry(address).DataBlk); + + sequencer.writeCallback(address, + getNondirectHitMachType(address, + TBEs[address].LastResponder), + getCacheEntry(address).DataBlk); + getCacheEntry(address).Dirty := true; } @@ -522,6 +586,7 @@ machine(L1Cache, "AMD Hammer-like protocol") DEBUG_EXPR(TBEs[address].NumPendingMsgs); TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - in_msg.Acks; DEBUG_EXPR(TBEs[address].NumPendingMsgs); + TBEs[address].LastResponder := in_msg.Sender; } } @@ -671,9 +736,11 @@ machine(L1Cache, "AMD Hammer-like protocol") if (L1DcacheMemory.isTagPresent(address)) { static_cast(Entry, L1DcacheMemory[address]).Dirty := static_cast(Entry, L2cacheMemory[address]).Dirty; static_cast(Entry, L1DcacheMemory[address]).DataBlk := static_cast(Entry, L2cacheMemory[address]).DataBlk; + static_cast(Entry, L1DcacheMemory[address]).FromL2 := true; } else { static_cast(Entry, L1IcacheMemory[address]).Dirty := static_cast(Entry, L2cacheMemory[address]).Dirty; static_cast(Entry, L1IcacheMemory[address]).DataBlk := static_cast(Entry, L2cacheMemory[address]).DataBlk; + static_cast(Entry, L1IcacheMemory[address]).FromL2 := true; } } @@ -905,7 +972,7 @@ machine(L1Cache, "AMD Hammer-like protocol") u_writeDataToCache; m_decrementNumberOfMessages; o_checkForCompletion; - hh_store_hit; + sx_external_store_hit; n_popResponseQueue; } @@ -941,7 +1008,7 @@ machine(L1Cache, "AMD Hammer-like protocol") } transition(ISM, All_acks_no_sharers, MM) { - hh_store_hit; + sxt_trig_ext_store_hit; g_sendUnblock; s_deallocateTBE; j_popTriggerQueue; @@ -967,7 +1034,7 @@ machine(L1Cache, "AMD Hammer-like protocol") } transition(OM, {All_acks, All_acks_no_sharers}, MM) { - hh_store_hit; + sxt_trig_ext_store_hit; g_sendUnblock; s_deallocateTBE; j_popTriggerQueue; @@ -997,7 +1064,7 @@ machine(L1Cache, "AMD Hammer-like protocol") u_writeDataToCache; m_decrementNumberOfMessages; o_checkForCompletion; - h_load_hit; + hx_external_load_hit; n_popResponseQueue; } @@ -1005,7 +1072,7 @@ machine(L1Cache, "AMD Hammer-like protocol") u_writeDataToCache; m_decrementNumberOfMessages; o_checkForCompletion; - h_load_hit; + hx_external_load_hit; n_popResponseQueue; } @@ -1014,7 +1081,7 @@ machine(L1Cache, "AMD Hammer-like protocol") r_setSharerBit; m_decrementNumberOfMessages; o_checkForCompletion; - h_load_hit; + hx_external_load_hit; n_popResponseQueue; } diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm index 80d0be159..7fc817600 100644 --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -100,7 +100,9 @@ external_type(NetDest, non_obj="yes") { external_type(Sequencer) { void readCallback(Address, DataBlock); + void readCallback(Address, GenericMachineType, DataBlock); void writeCallback(Address, DataBlock); + void writeCallback(Address, GenericMachineType, DataBlock); void checkCoherence(Address); void profileNack(Address, int, int, uint64); } diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index 2b844ef9d..753fdd230 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -574,23 +574,27 @@ Profiler::bankBusy() // non-zero cycle demand request void -Profiler::missLatency(Time t, RubyRequestType type) +Profiler::missLatency(Time cycles, + RubyRequestType type, + const GenericMachineType respondingMach) { - m_allMissLatencyHistogram.add(t); - m_missLatencyHistograms[type].add(t); + m_allMissLatencyHistogram.add(cycles); + m_missLatencyHistograms[type].add(cycles); + m_machLatencyHistograms[respondingMach].add(cycles); } // non-zero cycle prefetch request void -Profiler::swPrefetchLatency(Time t, CacheRequestType type, - GenericMachineType respondingMach) +Profiler::swPrefetchLatency(Time cycles, + CacheRequestType type, + const GenericMachineType respondingMach) { - m_allSWPrefetchLatencyHistogram.add(t); - m_SWPrefetchLatencyHistograms[type].add(t); - m_SWPrefetchMachLatencyHistograms[respondingMach].add(t); + m_allSWPrefetchLatencyHistogram.add(cycles); + m_SWPrefetchLatencyHistograms[type].add(cycles); + m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles); if (respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) { - m_SWPrefetchL2MissLatencyHistogram.add(t); + m_SWPrefetchL2MissLatencyHistogram.add(cycles); } } diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 20491cab7..de9834f05 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -133,9 +133,15 @@ class Profiler : public SimObject, public Consumer void controllerBusy(MachineID machID); void bankBusy(); - void missLatency(Time t, RubyRequestType type); - void swPrefetchLatency(Time t, CacheRequestType type, - GenericMachineType respondingMach); + + void missLatency(Time t, + RubyRequestType type, + const GenericMachineType respondingMach); + + void swPrefetchLatency(Time t, + CacheRequestType type, + const GenericMachineType respondingMach); + void sequencerRequests(int num) { m_sequencer_requests.add(num); } void profileTransition(const std::string& component, NodeID version, diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 19bcb4b1c..e4f85908f 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -305,6 +305,14 @@ Sequencer::removeRequest(SequencerRequest* srequest) void Sequencer::writeCallback(const Address& address, DataBlock& data) { + writeCallback(address, GenericMachineType_NULL, data); +} + +void +Sequencer::writeCallback(const Address& address, + GenericMachineType mach, + DataBlock& data) +{ assert(address == line_address(address)); assert(m_writeRequestTable.count(line_address(address))); @@ -329,12 +337,20 @@ Sequencer::writeCallback(const Address& address, DataBlock& data) m_controller->unblock(address); } - hitCallback(request, data); + hitCallback(request, mach, data); } void Sequencer::readCallback(const Address& address, DataBlock& data) { + readCallback(address, GenericMachineType_NULL, data); +} + +void +Sequencer::readCallback(const Address& address, + GenericMachineType mach, + DataBlock& data) +{ assert(address == line_address(address)); assert(m_readRequestTable.count(line_address(address))); @@ -349,11 +365,13 @@ Sequencer::readCallback(const Address& address, DataBlock& data) (request->ruby_request.type == RubyRequestType_RMW_Read) || (request->ruby_request.type == RubyRequestType_IFETCH)); - hitCallback(request, data); + hitCallback(request, mach, data); } void -Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data) +Sequencer::hitCallback(SequencerRequest* srequest, + GenericMachineType mach, + DataBlock& data) { const RubyRequest & ruby_request = srequest->ruby_request; Address request_address(ruby_request.paddr); @@ -376,7 +394,7 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data) // Profile the miss latency for all non-zero demand misses if (miss_latency != 0) { - g_system_ptr->getProfiler()->missLatency(miss_latency, type); + g_system_ptr->getProfiler()->missLatency(miss_latency, type, mach); if (Debug::getProtocolTrace()) { g_system_ptr->getProfiler()-> diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index a336751fd..fd6b390c2 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -75,8 +75,17 @@ class Sequencer : public RubyPort, public Consumer void printProgress(std::ostream& out) const; void writeCallback(const Address& address, DataBlock& data); + + void writeCallback(const Address& address, + GenericMachineType mach, + DataBlock& data); + void readCallback(const Address& address, DataBlock& data); + void readCallback(const Address& address, + GenericMachineType mach, + DataBlock& data); + RequestStatus makeRequest(const RubyRequest & request); RequestStatus getRequestStatus(const RubyRequest& request); bool empty() const; @@ -94,7 +103,10 @@ class Sequencer : public RubyPort, public Consumer int size, DataBlock*& data_ptr); void issueRequest(const RubyRequest& request); - void hitCallback(SequencerRequest* request, DataBlock& data); + void hitCallback(SequencerRequest* request, + GenericMachineType mach, + DataBlock& data); + bool insertRequest(SequencerRequest* request); |