diff options
Diffstat (limited to 'src/mem/ruby')
-rw-r--r-- | src/mem/ruby/libruby.hh | 3 | ||||
-rw-r--r-- | src/mem/ruby/profiler/AddressProfiler.cc | 29 | ||||
-rw-r--r-- | src/mem/ruby/profiler/AddressProfiler.hh | 3 | ||||
-rw-r--r-- | src/mem/ruby/profiler/Profiler.cc | 218 | ||||
-rw-r--r-- | src/mem/ruby/profiler/Profiler.hh | 37 | ||||
-rw-r--r-- | src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc | 25 | ||||
-rw-r--r-- | src/mem/ruby/system/CacheMemory.hh | 6 | ||||
-rw-r--r-- | src/mem/ruby/system/DMASequencer.cc | 1 | ||||
-rw-r--r-- | src/mem/ruby/system/System.cc | 15 | ||||
-rw-r--r-- | src/mem/ruby/system/TBETable.hh | 1 |
10 files changed, 25 insertions, 313 deletions
diff --git a/src/mem/ruby/libruby.hh b/src/mem/ruby/libruby.hh index 94018e9b9..3b6e19c41 100644 --- a/src/mem/ruby/libruby.hh +++ b/src/mem/ruby/libruby.hh @@ -14,7 +14,8 @@ enum RubyRequestType { RubyRequestType_Locked_Read, RubyRequestType_Locked_Write, RubyRequestType_RMW_Read, - RubyRequestType_RMW_Write + RubyRequestType_RMW_Write, + RubyRequestType_NUM }; enum RubyAccessMode { diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc index 9ff10dc90..9c9445de3 100644 --- a/src/mem/ruby/profiler/AddressProfiler.cc +++ b/src/mem/ruby/profiler/AddressProfiler.cc @@ -54,7 +54,6 @@ AddressProfiler::AddressProfiler() m_macroBlockAccessTrace = new Map<Address, AccessTraceForAddress>; m_programCounterAccessTrace = new Map<Address, AccessTraceForAddress>; m_retryProfileMap = new Map<Address, AccessTraceForAddress>; - m_persistentPredictionProfileMap = new Map<Address, AccessTraceForAddress>; clearStats(); } @@ -64,7 +63,6 @@ AddressProfiler::~AddressProfiler() delete m_macroBlockAccessTrace; delete m_programCounterAccessTrace; delete m_retryProfileMap; - delete m_persistentPredictionProfileMap; } void AddressProfiler::setHotLines(bool hot_lines){ @@ -125,31 +123,10 @@ void AddressProfiler::printStats(ostream& out) const m_retryProfileHisto.printPercent(out); out << endl; - out << "retry_histogram_per_instruction: "; - m_retryProfileHisto.printWithMultiplier(out, 1.0 / double(g_system_ptr->getProfiler()->getTotalInstructionsExecuted())); - out << endl; - printSorted(out, m_retryProfileMap, "block_address"); out << endl; } - if (m_persistentPredictionProfileHisto.size() > 0) { - out << "Persistent Prediction Profile" << endl; - out << "-------------" << endl; - out << endl; - out << "persistent prediction_histogram: " << m_persistentPredictionProfileHisto << endl; - - out << "persistent prediction_histogram_percent: "; - m_persistentPredictionProfileHisto.printPercent(out); - out << endl; - - out << "persistentPrediction_histogram_per_instruction: "; - m_persistentPredictionProfileHisto.printWithMultiplier(out, 1.0 / double(g_system_ptr->getProfiler()->getTotalInstructionsExecuted())); - out << endl; - - printSorted(out, m_persistentPredictionProfileMap, "block_address"); - out << endl; - } } void AddressProfiler::clearStats() @@ -233,12 +210,6 @@ void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, in } } -void AddressProfiler::profilePersistentPrediction(const Address& data_addr, AccessType type) -{ - m_persistentPredictionProfileHisto.add(1); - lookupTraceForAddress(data_addr, m_persistentPredictionProfileMap).addSample(1); -} - // ***** Normal Functions ****** static void printSorted(ostream& out, const Map<Address, AccessTraceForAddress>* record_map, string description) diff --git a/src/mem/ruby/profiler/AddressProfiler.hh b/src/mem/ruby/profiler/AddressProfiler.hh index 69bf1319a..24ee2af05 100644 --- a/src/mem/ruby/profiler/AddressProfiler.hh +++ b/src/mem/ruby/profiler/AddressProfiler.hh @@ -63,7 +63,6 @@ public: void clearStats(); void addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss); - void profilePersistentPrediction(const Address& data_addr, AccessType type); void profileRetry(const Address& data_addr, AccessType type, int count); void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); void profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); @@ -87,8 +86,6 @@ private: Map<Address, AccessTraceForAddress>* m_macroBlockAccessTrace; Map<Address, AccessTraceForAddress>* m_programCounterAccessTrace; Map<Address, AccessTraceForAddress>* m_retryProfileMap; - Map<Address, AccessTraceForAddress>* m_persistentPredictionProfileMap; - Histogram m_persistentPredictionProfileHisto; Histogram m_retryProfileHisto; Histogram m_retryProfileHistoWrite; Histogram m_retryProfileHistoRead; diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index e8aa7edf9..d5c47825f 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -52,7 +52,6 @@ */ #include "mem/ruby/profiler/Profiler.hh" -#include "mem/ruby/profiler/CacheProfiler.hh" #include "mem/ruby/profiler/AddressProfiler.hh" #include "mem/ruby/system/System.hh" #include "mem/ruby/network/Network.hh" @@ -73,27 +72,13 @@ static double process_memory_total(); static double process_memory_resident(); Profiler::Profiler(const string & name) - : m_conflicting_histogram(-1) { m_name = name; m_requestProfileMap_ptr = new Map<string, int>; - m_L1D_cache_profiler_ptr = new CacheProfiler("L1D_cache"); - m_L1I_cache_profiler_ptr = new CacheProfiler("L1I_cache"); - - m_L2_cache_profiler_ptr = new CacheProfiler("L2_cache"); m_inst_profiler_ptr = NULL; m_address_profiler_ptr = NULL; -/* - m_address_profiler_ptr = new AddressProfiler; - m_inst_profiler_ptr = NULL; - if (m_all_instructions) { - m_inst_profiler_ptr = new AddressProfiler; - } -*/ - m_conflicting_map_ptr = new Map<Address, Time>; - m_real_time_start_time = time(NULL); // Not reset in clearStats() m_stats_period = 1000000; // Default m_periodic_output_file_ptr = &cerr; @@ -105,12 +90,7 @@ Profiler::~Profiler() if (m_periodic_output_file_ptr != &cerr) { delete m_periodic_output_file_ptr; } - delete m_address_profiler_ptr; - delete m_L1D_cache_profiler_ptr; - delete m_L1I_cache_profiler_ptr; - delete m_L2_cache_profiler_ptr; delete m_requestProfileMap_ptr; - delete m_conflicting_map_ptr; } void Profiler::init(const vector<string> & argv, vector<string> memory_control_names) @@ -182,32 +162,24 @@ void Profiler::wakeup() { // FIXME - avoid the repeated code - Vector<integer_t> perProcInstructionCount; - perProcInstructionCount.setSize(RubySystem::getNumberOfSequencers()); - Vector<integer_t> perProcCycleCount; perProcCycleCount.setSize(RubySystem::getNumberOfSequencers()); for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) { - perProcInstructionCount[i] = g_system_ptr->getInstructionCount(i) - m_instructions_executed_at_start[i] + 1; perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; // The +1 allows us to avoid division by zero } integer_t total_misses = m_perProcTotalMisses.sum(); - integer_t instruction_executed = perProcInstructionCount.sum(); integer_t simics_cycles_executed = perProcCycleCount.sum(); integer_t transactions_started = m_perProcStartTransaction.sum(); integer_t transactions_ended = m_perProcEndTransaction.sum(); (*m_periodic_output_file_ptr) << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl; (*m_periodic_output_file_ptr) << "total_misses: " << total_misses << " " << m_perProcTotalMisses << endl; - (*m_periodic_output_file_ptr) << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl; (*m_periodic_output_file_ptr) << "simics_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl; (*m_periodic_output_file_ptr) << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl; (*m_periodic_output_file_ptr) << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl; - (*m_periodic_output_file_ptr) << "L1TBE_usage: " << m_L1tbeProfile << endl; - (*m_periodic_output_file_ptr) << "L2TBE_usage: " << m_L2tbeProfile << endl; (*m_periodic_output_file_ptr) << "mbytes_resident: " << process_memory_resident() << endl; (*m_periodic_output_file_ptr) << "mbytes_total: " << process_memory_total() << endl; if (process_memory_total() > 0) { @@ -292,7 +264,7 @@ void Profiler::printStats(ostream& out, bool short_stats) out << "Virtual_time_in_seconds: " << seconds << endl; out << "Virtual_time_in_minutes: " << minutes << endl; out << "Virtual_time_in_hours: " << hours << endl; - out << "Virtual_time_in_days: " << hours << endl; + out << "Virtual_time_in_days: " << days << endl; out << endl; out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; @@ -308,43 +280,26 @@ void Profiler::printStats(ostream& out, bool short_stats) } out << endl; - if(m_num_BA_broadcasts + m_num_BA_unicasts != 0){ - out << endl; - out << "Broadcast_percent: " << (float)m_num_BA_broadcasts/(m_num_BA_broadcasts+m_num_BA_unicasts) << endl; - } } - Vector<integer_t> perProcInstructionCount; Vector<integer_t> perProcCycleCount; - Vector<double> perProcCPI; - Vector<double> perProcMissesPerInsn; - Vector<double> perProcInsnPerTrans; Vector<double> perProcCyclesPerTrans; Vector<double> perProcMissesPerTrans; - perProcInstructionCount.setSize(RubySystem::getNumberOfSequencers()); - perProcCycleCount.setSize(RubySystem::getNumberOfSequencers()); - perProcCPI.setSize(RubySystem::getNumberOfSequencers()); - perProcMissesPerInsn.setSize(RubySystem::getNumberOfSequencers()); - perProcInsnPerTrans.setSize(RubySystem::getNumberOfSequencers()); + perProcCycleCount.setSize(RubySystem::getNumberOfSequencers()); perProcCyclesPerTrans.setSize(RubySystem::getNumberOfSequencers()); perProcMissesPerTrans.setSize(RubySystem::getNumberOfSequencers()); for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) { - perProcInstructionCount[i] = g_system_ptr->getInstructionCount(i) - m_instructions_executed_at_start[i] + 1; perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; // The +1 allows us to avoid division by zero - perProcCPI[i] = double(ruby_cycles)/perProcInstructionCount[i]; - perProcMissesPerInsn[i] = 1000.0 * (double(m_perProcTotalMisses[i]) / double(perProcInstructionCount[i])); int trans = m_perProcEndTransaction[i]; if (trans == 0) { - perProcInsnPerTrans[i] = 0; perProcCyclesPerTrans[i] = 0; perProcMissesPerTrans[i] = 0; } else { - perProcInsnPerTrans[i] = perProcInstructionCount[i] / double(trans); perProcCyclesPerTrans[i] = ruby_cycles / double(trans); perProcMissesPerTrans[i] = m_perProcTotalMisses[i] / double(trans); } @@ -353,12 +308,10 @@ void Profiler::printStats(ostream& out, bool short_stats) integer_t total_misses = m_perProcTotalMisses.sum(); integer_t user_misses = m_perProcUserMisses.sum(); integer_t supervisor_misses = m_perProcSupervisorMisses.sum(); - integer_t instruction_executed = perProcInstructionCount.sum(); integer_t simics_cycles_executed = perProcCycleCount.sum(); integer_t transactions_started = m_perProcStartTransaction.sum(); integer_t transactions_ended = m_perProcEndTransaction.sum(); - double instructions_per_transaction = (transactions_ended != 0) ? double(instruction_executed) / double(transactions_ended) : 0; double cycles_per_transaction = (transactions_ended != 0) ? (RubySystem::getNumberOfSequencers() * double(ruby_cycles)) / double(transactions_ended) : 0; double misses_per_transaction = (transactions_ended != 0) ? double(total_misses) / double(transactions_ended) : 0; @@ -367,23 +320,15 @@ void Profiler::printStats(ostream& out, bool short_stats) out << "user_misses: " << user_misses << " " << m_perProcUserMisses << endl; out << "supervisor_misses: " << supervisor_misses << " " << m_perProcSupervisorMisses << endl; out << endl; - out << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl; out << "ruby_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl; - out << "cycles_per_instruction: " << (RubySystem::getNumberOfSequencers()*double(ruby_cycles))/double(instruction_executed) << " " << perProcCPI << endl; - out << "misses_per_thousand_instructions: " << 1000.0 * (double(total_misses) / double(instruction_executed)) << " " << perProcMissesPerInsn << endl; out << endl; out << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl; out << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl; - out << "instructions_per_transaction: " << instructions_per_transaction << " " << perProcInsnPerTrans << endl; out << "cycles_per_transaction: " << cycles_per_transaction << " " << perProcCyclesPerTrans << endl; out << "misses_per_transaction: " << misses_per_transaction << " " << perProcMissesPerTrans << endl; out << endl; - // m_L1D_cache_profiler_ptr->printStats(out); - // m_L1I_cache_profiler_ptr->printStats(out); - // m_L2_cache_profiler_ptr->printStats(out); - out << endl; vector<string>::iterator it; @@ -409,7 +354,7 @@ void Profiler::printStats(ostream& out, bool short_stats) if (m_memReq || m_memRefresh) { // if there's a memory controller at all long long int total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; double stallsPerReq = total_stalls * 1.0 / m_memReq; - out << "Memory control:" << endl; + out << "Memory control " << (*it) << ":" << endl; out << " memory_total_requests: " << m_memReq << endl; // does not include refreshes out << " memory_reads: " << m_memRead << endl; out << " memory_writes: " << m_memWrite << endl; @@ -463,12 +408,7 @@ void Profiler::printStats(ostream& out, bool short_stats) out << "Busy Bank Count:" << m_busyBankCount << endl; out << endl; - out << "L1TBE_usage: " << m_L1tbeProfile << endl; - out << "L2TBE_usage: " << m_L2tbeProfile << endl; - out << "StopTable_usage: " << m_stopTableProfile << endl; out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl; - out << "store_buffer_size: " << m_store_buffer_size << endl; - out << "unique_blocks_in_store_buffer: " << m_store_buffer_blocks << endl; out << endl; } @@ -486,7 +426,6 @@ void Profiler::printStats(ostream& out, bool short_stats) out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl; } } - out << "miss_latency_L2Miss: " << m_L2MissLatencyHistogram << endl; out << endl; @@ -505,12 +444,6 @@ void Profiler::printStats(ostream& out, bool short_stats) } out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl; - out << "multicast_retries: " << m_multicast_retry_histogram << endl; - out << "gets_mask_prediction_count: " << m_gets_mask_prediction << endl; - out << "getx_mask_prediction_count: " << m_getx_mask_prediction << endl; - out << "explicit_training_mask: " << m_explicit_training_mask << endl; - out << endl; - if (m_all_sharing_histogram.size() > 0) { out << "all_sharing: " << m_all_sharing_histogram << endl; out << "read_sharing: " << m_read_sharing_histogram << endl; @@ -529,17 +462,8 @@ void Profiler::printStats(ostream& out, bool short_stats) out << endl; } - if (m_conflicting_histogram.size() > 0) { - out << "conflicting_histogram: " << m_conflicting_histogram << endl; - out << "conflicting_histogram_percent: "; m_conflicting_histogram.printPercent(out); out << endl; - out << endl; - } - if (m_outstanding_requests.size() > 0) { out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl; - if (m_outstanding_persistent_requests.size() > 0) { - out << "outstanding_persistent_requests: "; m_outstanding_persistent_requests.printPercent(out); out << endl; - } out << endl; } } @@ -610,19 +534,13 @@ void Profiler::printResourceUsage(ostream& out) const void Profiler::clearStats() { - m_num_BA_unicasts = 0; - m_num_BA_broadcasts = 0; - m_ruby_start = g_eventQueue_ptr->getTime(); - m_instructions_executed_at_start.setSize(RubySystem::getNumberOfSequencers()); m_cycles_executed_at_start.setSize(RubySystem::getNumberOfSequencers()); for (int i=0; i < RubySystem::getNumberOfSequencers(); i++) { if (g_system_ptr == NULL) { - m_instructions_executed_at_start[i] = 0; m_cycles_executed_at_start[i] = 0; } else { - m_instructions_executed_at_start[i] = g_system_ptr->getInstructionCount(i); m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i); } } @@ -657,11 +575,7 @@ void Profiler::clearStats() m_delayedCyclesVCHistograms[i].clear(); } - m_gets_mask_prediction.clear(); - m_getx_mask_prediction.clear(); - m_explicit_training_mask.clear(); - - m_missLatencyHistograms.setSize(CacheRequestType_NUM); + m_missLatencyHistograms.setSize(RubyRequestType_NUM); for(int i=0; i<m_missLatencyHistograms.size(); i++) { m_missLatencyHistograms[i].clear(200); } @@ -670,7 +584,6 @@ void Profiler::clearStats() m_machLatencyHistograms[i].clear(200); } m_allMissLatencyHistogram.clear(200); - m_L2MissLatencyHistogram.clear(200); m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM); for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) { @@ -681,67 +594,23 @@ void Profiler::clearStats() m_SWPrefetchMachLatencyHistograms[i].clear(200); } m_allSWPrefetchLatencyHistogram.clear(200); - m_SWPrefetchL2MissLatencyHistogram.clear(200); - - m_multicast_retry_histogram.clear(); - - m_L1tbeProfile.clear(); - m_L2tbeProfile.clear(); - m_stopTableProfile.clear(); - m_filter_action_histogram.clear(); m_sequencer_requests.clear(); - m_store_buffer_size.clear(); - m_store_buffer_blocks.clear(); m_read_sharing_histogram.clear(); m_write_sharing_histogram.clear(); m_all_sharing_histogram.clear(); m_cache_to_cache = 0; m_memory_to_cache = 0; - m_predictions = 0; - m_predictionOpportunities = 0; - m_goodPredictions = 0; - // clear HashMaps m_requestProfileMap_ptr->clear(); // count requests profiled m_requests = 0; - // Conflicting requests - m_conflicting_map_ptr->clear(); - m_conflicting_histogram.clear(); - m_outstanding_requests.clear(); m_outstanding_persistent_requests.clear(); - m_L1D_cache_profiler_ptr->clearStats(); - m_L1I_cache_profiler_ptr->clearStats(); - m_L2_cache_profiler_ptr->clearStats(); - - // for MemoryControl: -/* - m_memReq = 0; - m_memBankBusy = 0; - m_memBusBusy = 0; - m_memTfawBusy = 0; - m_memReadWriteBusy = 0; - m_memDataBusBusy = 0; - m_memRefresh = 0; - m_memRead = 0; - m_memWrite = 0; - m_memWaitCycles = 0; - m_memInputQ = 0; - m_memBankQ = 0; - m_memArbWait = 0; - m_memRandBusy = 0; - m_memNotOld = 0; - - for (int bank=0; bank < m_memBankCount.size(); bank++) { - m_memBankCount[bank] = 0; - } -*/ //added by SS vector<string>::iterator it; @@ -773,19 +642,6 @@ void Profiler::clearStats() m_ruby_start = g_eventQueue_ptr->getTime(); } -void Profiler::profileConflictingRequests(const Address& addr) -{ - assert(addr == line_address(addr)); - Time last_time = m_ruby_start; - if (m_conflicting_map_ptr->exist(addr)) { - last_time = m_conflicting_map_ptr->lookup(addr); - } - Time current_time = g_eventQueue_ptr->getTime(); - assert (current_time - last_time > 0); - m_conflicting_histogram.add(current_time - last_time); - m_conflicting_map_ptr->add(addr, current_time); -} - void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) { if (msg.getType() != CacheRequestType_IFETCH) { @@ -843,27 +699,6 @@ void Profiler::profileRequest(const string& requestStr) } } -void Profiler::recordPrediction(bool wasGood, bool wasPredicted) -{ - m_predictionOpportunities++; - if(wasPredicted){ - m_predictions++; - if(wasGood){ - m_goodPredictions++; - } - } -} - -void Profiler::profileFilterAction(int action) -{ - m_filter_action_histogram.add(action); -} - -void Profiler::profileMulticastRetry(const Address& addr, int count) -{ - m_multicast_retry_histogram.add(count); -} - void Profiler::startTransaction(int cpu) { m_perProcStartTransaction[cpu]++; @@ -894,12 +729,6 @@ void Profiler::missLatency(Time t, RubyRequestType type) { m_allMissLatencyHistogram.add(t); m_missLatencyHistograms[type].add(t); - /* - m_machLatencyHistograms[respondingMach].add(t); - if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) { - m_L2MissLatencyHistogram.add(t); - } - */ } // non-zero cycle prefetch request @@ -968,41 +797,6 @@ static double process_memory_resident() return double(res_size_in_pages)*MULTIPLIER; // size in megabytes } -void Profiler::profileGetXMaskPrediction(const Set& pred_set) -{ - m_getx_mask_prediction.add(pred_set.count()); -} - -void Profiler::profileGetSMaskPrediction(const Set& pred_set) -{ - m_gets_mask_prediction.add(pred_set.count()); -} - -void Profiler::profileTrainingMask(const Set& pred_set) -{ - m_explicit_training_mask.add(pred_set.count()); -} - -int64 Profiler::getTotalInstructionsExecuted() const -{ - int64 sum = 1; // Starting at 1 allows us to avoid division by zero - for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) { - sum += (g_system_ptr->getInstructionCount(i) - m_instructions_executed_at_start[i]); - } - return sum; -} - -int64 Profiler::getTotalTransactionsExecuted() const -{ - int64 sum = m_perProcEndTransaction.sum(); - if (sum > 0) { - return sum; - } else { - return 1; // Avoid division by zero errors - } -} - - void Profiler::rubyWatch(int id){ //int rn_g1 = 0;//SIMICS_get_register_number(id, "g1"); uint64 tr = 0;//SIMICS_read_register(id, rn_g1); @@ -1029,6 +823,10 @@ bool Profiler::watchAddress(Address addr){ return false; } +int64 Profiler::getTotalTransactionsExecuted() const { + return m_perProcEndTransaction.sum(); +} + // For MemoryControl: void Profiler::profileMemReq(string name, int bank) { // printf("name is %s", name.c_str()); diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 4549e3ea7..673051db3 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -72,7 +72,6 @@ #include "mem/ruby/system/MemoryControl.hh" class CacheMsg; -class CacheProfiler; class AddressProfiler; template <class KEY_TYPE, class VALUE_TYPE> class Map; @@ -140,9 +139,6 @@ public: void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } - void countBAUnicast() { m_num_BA_unicasts++; } - void countBABroadcast() { m_num_BA_broadcasts++; } - void recordPrediction(bool wasGood, bool wasPredicted); void startTransaction(int cpu); @@ -153,15 +149,8 @@ public: void bankBusy(); void missLatency(Time t, RubyRequestType type); void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); - void stopTableUsageSample(int num) { m_stopTableProfile.add(num); } - void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); } - void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); } void sequencerRequests(int num) { m_sequencer_requests.add(num); } - void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);} - void profileGetXMaskPrediction(const Set& pred_set); - void profileGetSMaskPrediction(const Set& pred_set); - void profileTrainingMask(const Set& pred_set); void profileTransition(const string& component, NodeID version, Address addr, const string& state, const string& event, const string& next_state, const string& note); @@ -169,7 +158,6 @@ public: void print(ostream& out) const; - int64 getTotalInstructionsExecuted() const; int64 getTotalTransactionsExecuted() const; void rubyWatch(int proc); @@ -209,9 +197,6 @@ private: Profiler& operator=(const Profiler& obj); // Data Members (m_ prefix) - CacheProfiler* m_L1D_cache_profiler_ptr; - CacheProfiler* m_L1I_cache_profiler_ptr; - CacheProfiler* m_L2_cache_profiler_ptr; AddressProfiler* m_address_profiler_ptr; AddressProfiler* m_inst_profiler_ptr; @@ -224,9 +209,6 @@ private: Time m_ruby_start; time_t m_real_time_start_time; - int m_num_BA_unicasts; - int m_num_BA_broadcasts; - Vector<integer_t> m_perProcTotalMisses; Vector<integer_t> m_perProcUserMisses; Vector<integer_t> m_perProcSupervisorMisses; @@ -236,16 +218,10 @@ private: integer_t m_busyBankCount; Histogram m_multicast_retry_histogram; - Histogram m_L1tbeProfile; - Histogram m_L2tbeProfile; - Histogram m_stopTableProfile; - Histogram m_filter_action_histogram; Histogram m_tbeProfile; Histogram m_sequencer_requests; - Histogram m_store_buffer_size; - Histogram m_store_buffer_blocks; Histogram m_read_sharing_histogram; Histogram m_write_sharing_histogram; Histogram m_all_sharing_histogram; @@ -256,7 +232,6 @@ private: Vector<Histogram> m_missLatencyHistograms; Vector<Histogram> m_machLatencyHistograms; - Histogram m_L2MissLatencyHistogram; Histogram m_allMissLatencyHistogram; Histogram m_allSWPrefetchLatencyHistogram; @@ -268,18 +243,6 @@ private: Histogram m_delayedCyclesNonPFHistogram; Vector<Histogram> m_delayedCyclesVCHistograms; - int m_predictions; - int m_predictionOpportunities; - int m_goodPredictions; - - Histogram m_gets_mask_prediction; - Histogram m_getx_mask_prediction; - Histogram m_explicit_training_mask; - - // For profiling possibly conflicting requests - Map<Address, Time>* m_conflicting_map_ptr; - Histogram m_conflicting_histogram; - Histogram m_outstanding_requests; Histogram m_outstanding_persistent_requests; diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc index 883edd3c8..54d38c187 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc +++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc @@ -64,11 +64,6 @@ void profile_outstanding_request(int outstanding) g_system_ptr->getProfiler()->profileOutstandingRequest(outstanding); } -void profile_outstanding_persistent_request(int outstanding) -{ - g_system_ptr->getProfiler()->profileOutstandingPersistentRequest(outstanding); -} - void profile_average_latency_estimate(int latency) { g_system_ptr->getProfiler()->profileAverageLatencyEstimate(latency); @@ -84,26 +79,6 @@ void profileMsgDelay(int virtualNetwork, int delayCycles) g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles); } -void profile_token_retry(const Address& addr, AccessType type, int count) -{ - g_system_ptr->getProfiler()->getAddressProfiler()->profileRetry(addr, type, count); -} - -void profile_filter_action(int action) -{ - g_system_ptr->getProfiler()->profileFilterAction(action); -} - -void profile_persistent_prediction(const Address& addr, AccessType type) -{ - g_system_ptr->getProfiler()->getAddressProfiler()->profilePersistentPrediction(addr, type); -} - -void profile_multicast_retry(const Address& addr, int count) -{ - g_system_ptr->getProfiler()->profileMulticastRetry(addr, count); -} - void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) { g_system_ptr->getProfiler()->getAddressProfiler()->profileGetX(datablock, PC, owner, sharers, requestor); diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh index cfaa229a5..7a46bd3a5 100644 --- a/src/mem/ruby/system/CacheMemory.hh +++ b/src/mem/ruby/system/CacheMemory.hh @@ -127,6 +127,7 @@ public: void print(ostream& out) const; void printData(ostream& out) const; + void clearStats() const; void printStats(ostream& out) const; private: @@ -561,6 +562,11 @@ void CacheMemory::printData(ostream& out) const out << "printData() not supported" << endl; } +inline void CacheMemory::clearStats() const +{ + m_profiler_ptr->clearStats(); +} + inline void CacheMemory::printStats(ostream& out) const { diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc index 58ec7bb45..d29dba602 100644 --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -51,6 +51,7 @@ int64_t DMASequencer::makeRequest(const RubyRequest & request) case RubyRequestType_Locked_Write: case RubyRequestType_RMW_Read: case RubyRequestType_RMW_Write: + case RubyRequestType_NUM: assert(0); } diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc index 2c24c9ade..ad67cdc80 100644 --- a/src/mem/ruby/system/System.cc +++ b/src/mem/ruby/system/System.cc @@ -347,15 +347,16 @@ void RubySystem::printStats(ostream& out) void RubySystem::clearStats() const { - /* m_profiler_ptr->clearStats(); - for (int i=0; i<m_rubyRequestQueues.size(); i++) - for (int j=0;j<m_rubyRequestQueues[i].size(); j++) - m_rubyRequestQueues[i][j]->clearStats(); m_network_ptr->clearStats(); - for (int i=0; i < MachineType_base_level(MachineType_NUM); i++) - m_controllers[i][0]->clearStats(); - */ + for (map<string, CacheMemory*>::const_iterator it = m_caches.begin(); + it != m_caches.end(); it++) { + (*it).second->clearStats(); + } + for (map<string, AbstractController*>::const_iterator it = m_controllers.begin(); + it != m_controllers.end(); it++) { + (*it).second->clearStats(); + } } void RubySystem::recordCacheContents(CacheRecorder& tr) const diff --git a/src/mem/ruby/system/TBETable.hh b/src/mem/ruby/system/TBETable.hh index 7d2daa55a..2b00f7a06 100644 --- a/src/mem/ruby/system/TBETable.hh +++ b/src/mem/ruby/system/TBETable.hh @@ -128,7 +128,6 @@ void TBETable<ENTRY>::allocate(const Address& address) { assert(isPresent(address) == false); assert(m_map.size() < m_number_of_TBEs); - g_system_ptr->getProfiler()->L2tbeUsageSample(m_map.size()); m_map.add(address, ENTRY()); } |