diff options
Diffstat (limited to 'src/mem/ruby/profiler')
-rw-r--r-- | src/mem/ruby/profiler/Profiler.cc | 1319 | ||||
-rw-r--r-- | src/mem/ruby/profiler/Profiler.hh | 593 |
2 files changed, 258 insertions, 1654 deletions
diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index 9bb4b6b4c..456123a68 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -64,20 +64,11 @@ #include "Map.hh" #include "Debug.hh" #include "MachineType.hh" -// #include "TransactionInterfaceManager.hh" -#include "interface.hh" -//#include "XactVisualizer.hh" //gem5:Arka for decomissioning log_tm -//#include "XactProfiler.hh" //gem5:Arka for decomissioning log_tm - -// extern "C" { -// #include "Rock.hh" -// } // Allows use of times() library call, which determines virtual runtime #include <sys/times.h> extern std::ostream * debug_cout_ptr; -extern std::ostream * xact_cout_ptr; static double process_memory_total(); static double process_memory_resident(); @@ -97,42 +88,11 @@ Profiler::Profiler() m_inst_profiler_ptr = new AddressProfiler; } - //m_xact_profiler_ptr = new XactProfiler; //gem5:Arka for decomissioning og log_tm - m_conflicting_map_ptr = new Map<Address, Time>; m_real_time_start_time = time(NULL); // Not reset in clearStats() m_stats_period = 1000000; // Default m_periodic_output_file_ptr = &cerr; - m_xact_visualizer_ptr = &cout; - - //---- begin XACT_MEM code - m_xactExceptionMap_ptr = new Map<int, int>; - m_procsInXactMap_ptr = new Map<int, int>; - m_abortIDMap_ptr = new Map<int, int>; - m_commitIDMap_ptr = new Map<int, int>; - m_xactRetryIDMap_ptr = new Map<int, int>; - m_xactCyclesIDMap_ptr = new Map<int, int>; - m_xactReadSetIDMap_ptr = new Map<int, int>; - m_xactWriteSetIDMap_ptr = new Map<int, int>; - m_xactLoadMissIDMap_ptr = new Map<int, int>; - m_xactStoreMissIDMap_ptr = new Map<int, int>; - m_xactInstrCountIDMap_ptr = new Map<int, integer_t>; - m_abortPCMap_ptr = new Map<Address, int>; - m_abortAddressMap_ptr = new Map<Address, int>; - m_nackXIDMap_ptr = new Map<int, int>; - m_nackXIDPairMap_ptr = new Map<int, Map<int, int> * >; - m_nackPCMap_ptr = new Map<Address, int>; - m_watch_address_list_ptr = new Map<Address, int>; - m_readSetMatch_ptr = new Map<Address, int>; - m_readSetNoMatch_ptr = new Map<Address, int>; - m_writeSetMatch_ptr = new Map<Address, int>; - m_writeSetNoMatch_ptr = new Map<Address, int>; - m_xactReadFilterBitsSetOnCommit = new Map<int, Histogram>; - m_xactReadFilterBitsSetOnAbort = new Map<int, Histogram>; - m_xactWriteFilterBitsSetOnCommit = new Map<int, Histogram>; - m_xactWriteFilterBitsSetOnAbort = new Map<int, Histogram>; - //---- end XACT_MEM code // for MemoryControl: m_memReq = 0; @@ -169,7 +129,6 @@ Profiler::~Profiler() delete m_L1D_cache_profiler_ptr; delete m_L1I_cache_profiler_ptr; delete m_L2_cache_profiler_ptr; - //delete m_xact_profiler_ptr; //gem5:Arka for decomissioning of log_tm delete m_requestProfileMap_ptr; delete m_conflicting_map_ptr; } @@ -192,14 +151,14 @@ void Profiler::wakeup() integer_t total_misses = m_perProcTotalMisses.sum(); integer_t instruction_executed = perProcInstructionCount.sum(); - integer_t simics_cycles_executed = perProcCycleCount.sum(); + integer_t cycles_executed = perProcCycleCount.sum(); integer_t transactions_started = m_perProcStartTransaction.sum(); integer_t transactions_ended = m_perProcEndTransaction.sum(); (*m_periodic_output_file_ptr) << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl; (*m_periodic_output_file_ptr) << "total_misses: " << total_misses << " " << m_perProcTotalMisses << endl; (*m_periodic_output_file_ptr) << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl; - (*m_periodic_output_file_ptr) << "simics_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl; + (*m_periodic_output_file_ptr) << "cycles_executed: " << cycles_executed << " " << perProcCycleCount << endl; (*m_periodic_output_file_ptr) << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl; (*m_periodic_output_file_ptr) << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl; (*m_periodic_output_file_ptr) << "L1TBE_usage: " << m_L1tbeProfile << endl; @@ -350,7 +309,7 @@ void Profiler::printStats(ostream& out, bool short_stats) integer_t user_misses = m_perProcUserMisses.sum(); integer_t supervisor_misses = m_perProcSupervisorMisses.sum(); integer_t instruction_executed = perProcInstructionCount.sum(); - integer_t simics_cycles_executed = perProcCycleCount.sum(); + integer_t cycles_executed = perProcCycleCount.sum(); integer_t transactions_started = m_perProcStartTransaction.sum(); integer_t transactions_ended = m_perProcEndTransaction.sum(); @@ -364,7 +323,7 @@ void Profiler::printStats(ostream& out, bool short_stats) out << "supervisor_misses: " << supervisor_misses << " " << m_perProcSupervisorMisses << endl; out << endl; out << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl; - out << "simics_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl; + out << "cycles_executed: " << cycles_executed << " " << perProcCycleCount << endl; out << "cycles_per_instruction: " << (RubyConfig::numberOfProcessors()*double(ruby_cycles))/double(instruction_executed) << " " << perProcCPI << endl; out << "misses_per_thousand_instructions: " << 1000.0 * (double(total_misses) / double(instruction_executed)) << " " << perProcMissesPerInsn << endl; out << endl; @@ -520,304 +479,6 @@ void Profiler::printStats(ostream& out, bool short_stats) } } - if (XACT_MEMORY){ - // Transactional Memory stats - out << "Transactional Memory Stats:" << endl; - out << "------- xact --------" << endl; - out << "xact_size_dist: " << m_xactSizes << endl; - out << "xact_instr_count: " << m_xactInstrCount << endl; - out << "xact_time_dist: " << m_xactCycles << endl; - out << "xact_log_size_dist: " << m_xactLogs << endl; - out << "xact_read_set_size_dist: " << m_xactReads << endl; - out << "xact_write_set_size_dist: " << m_xactWrites << endl; - out << "xact_overflow_read_lines_dist: " << m_xactOverflowReads << endl; - out << "xact_overflow_write_lines_dist: " << m_xactOverflowWrites << endl; - out << "xact_overflow_read_set_size_dist: " << m_xactOverflowTotalReads << endl; - out << "xact_overflow_write_set_size_dist: " << m_xactOverflowTotalWrites << endl; - out << "xact_miss_load_dist: " << m_xactLoadMisses << endl; - out << "xact_miss_store_dist: " << m_xactStoreMisses << endl; - out << "xact_nacked: " << m_xactNacked << endl; - out << "xact_retries: " << m_xactRetries << endl; - out << "xact_abort_delays: " << m_abortDelays << endl; - out << "xact_aborts: " << m_transactionAborts << endl; - if (ATMTP_ENABLED) { - out << "xact_log_overflows: " << m_transactionLogOverflows << endl; - out << "xact_cache_overflows: " << m_transactionCacheOverflows << endl; - out << "xact_unsup_inst_aborts: " << m_transactionUnsupInsts << endl; - out << "xact_save_rest_aborts: " << m_transactionSaveRestAborts << endl; - } - out << "xact_writebacks: " << m_transWBs << endl; - out << "xact_extra_wbs: " << m_extraWBs << endl; - out << "xact_handler_startup_delay: " << m_abortStarupDelay << endl; - out << "xact_handler_per_block_delay: " << m_abortPerBlockDelay << endl; - out << "xact_inferred_aborts: " << m_inferredAborts << endl; - //out << "xact_histogram: " << m_procsInXact << endl; - - if (!short_stats) { - Vector<int> nackedXIDKeys = m_nackXIDMap_ptr->keys(); - nackedXIDKeys.sortVector(); - out << endl; - int total_nacks = 0; - out << "------- xact Nacks by XID --------" << endl; - for(int i=0; i<nackedXIDKeys.size(); i++) { - int key = nackedXIDKeys[i]; - int count = m_nackXIDMap_ptr->lookup(key); - total_nacks += count; - out << "xact " << key << " " - << setw(6) << dec << count - << endl; - } - out << "Total Nacks: " << total_nacks << endl; - out << "---------------" << endl; - out << endl; - - // Print XID Nack Pairs - Vector<int> nackedXIDPairKeys = m_nackXIDPairMap_ptr->keys(); - nackedXIDPairKeys.sortVector(); - out << endl; - total_nacks = 0; - out << "------- xact Nacks by XID Pairs --------" << endl; - for(int i=0; i<nackedXIDPairKeys.size(); i++) { - int key = nackedXIDPairKeys[i]; - Map<int, int> * my_map = m_nackXIDPairMap_ptr->lookup(key); - Vector<int> my_keys = my_map->keys(); - my_keys.sortVector(); - for(int j=0; j<my_keys.size(); j++){ - int nid = my_keys[j]; - int count = my_map->lookup(nid); - total_nacks += count; - out << "xact " << key << " nacked by xact " << nid << " " - << setw(6) << dec << count - << endl; - } - } - out << "Total Nacks: " << total_nacks << endl; - out << "---------------" << endl; - out << endl; - - - Vector<Address> nackedPCKeys = m_nackPCMap_ptr->keys(); - nackedPCKeys.sortVector(); - out << endl; - out << "------- xact Nacks by PC --------" << endl; - for(int i=0; i<nackedPCKeys.size(); i++) { - Address key = nackedPCKeys[i]; - int count = m_nackPCMap_ptr->lookup(key); - out << "xact_Nack " << key << " " - << setw(4) << dec << count - << endl; - } - out << "---------------" << endl; - out << endl; - - - Vector<int> xactExceptionKeys = m_xactExceptionMap_ptr->keys(); - xactExceptionKeys.sortVector(); - out << "------- xact exceptions --------" << endl; - for(int i=0; i<xactExceptionKeys.size(); i++) { - int key = xactExceptionKeys[i]; - int count = m_xactExceptionMap_ptr->lookup(key); - out << "xact_exception(" - << hex << key << "):" - << setw(4) << dec << count - << endl; - } - out << endl; - out << "---------------" << endl; - out << endl; - - Vector<int> abortIDKeys = m_abortIDMap_ptr->keys(); - abortIDKeys.sortVector(); - out << "------- xact abort by XID --------" << endl; - for(int i=0; i<abortIDKeys.size(); i++) { - int count = m_abortIDMap_ptr->lookup(abortIDKeys[i]); - out << "xact_aborts(" - << dec << abortIDKeys[i] << "):" - << setw(7) << count - << endl; - } - out << endl; - out << "---------------" << endl; - out << endl; - - Vector<Address> abortedPCKeys = m_abortPCMap_ptr->keys(); - abortedPCKeys.sortVector(); - out << endl; - out << "------- xact Aborts by PC --------" << endl; - for(int i=0; i<abortedPCKeys.size(); i++) { - Address key = abortedPCKeys[i]; - int count = m_abortPCMap_ptr->lookup(key); - out << "xact_abort_pc " << key - << setw(4) << dec << count - << endl; - } - out << "---------------" << endl; - out << endl; - - Vector<Address> abortedAddrKeys = m_abortAddressMap_ptr->keys(); - abortedAddrKeys.sortVector(); - out << endl; - out << "------- xact Aborts by Address --------" << endl; - for(int i=0; i<abortedAddrKeys.size(); i++) { - Address key = abortedAddrKeys[i]; - int count = m_abortAddressMap_ptr->lookup(key); - out << "xact_abort_address " << key - << setw(4) << dec << count - << endl; - } - out << "---------------" << endl; - out << endl; - } // !short_stats - - Vector<int> commitIDKeys = m_commitIDMap_ptr->keys(); - commitIDKeys.sortVector(); - out << "------- xact Commit Stats by XID --------" << endl; - for(int i=0; i<commitIDKeys.size(); i++) { - int count = m_commitIDMap_ptr->lookup(commitIDKeys[i]); - double retry_count = (double)m_xactRetryIDMap_ptr->lookup(commitIDKeys[i]) / count; - double cycles_count = (double)m_xactCyclesIDMap_ptr->lookup(commitIDKeys[i]) / count; - double readset_count = (double)m_xactReadSetIDMap_ptr->lookup(commitIDKeys[i]) / count; - double writeset_count = (double)m_xactWriteSetIDMap_ptr->lookup(commitIDKeys[i]) / count; - double loadmiss_count = (double)m_xactLoadMissIDMap_ptr->lookup(commitIDKeys[i]) / count; - double storemiss_count = (double)m_xactStoreMissIDMap_ptr->lookup(commitIDKeys[i]) / count; - double instr_count = (double)m_xactInstrCountIDMap_ptr->lookup(commitIDKeys[i]) / count; - out << "xact_stats id: " - << dec << commitIDKeys[i] - << " count: " << setw(7) << count - << " Cycles: " << setw(7) << cycles_count - << " Instr: " << setw(7) << instr_count - << " ReadSet: " << setw(7) << readset_count - << " WriteSet: " << setw(7) << writeset_count - << " LoadMiss: " << setw(7) << loadmiss_count - << " StoreMiss: " << setw(7) << storemiss_count - << " Retry Count: " << setw(7) << retry_count - << endl; - } - out << endl; - out << "---------------" << endl; - out << endl; - - if (!short_stats) { - Vector<int> procsInXactKeys = m_procsInXactMap_ptr->keys(); - procsInXactKeys.sortVector(); - out << "------- xact histogram --------" << endl; - for(int i=0; i<procsInXactKeys.size(); i++) { - int count = m_procsInXactMap_ptr->lookup(procsInXactKeys[i]); - int key = procsInXactKeys[i]; - out << "xact_histogram(" - << dec << key << "):" - << setw(8) << count - << endl; - } - out << endl; - out << "---------------" << endl; - out << endl; - - // Read/Write set Bloom filter stats - //int false_reads = 0; - long long int false_reads = m_readSetNoMatch; - Vector<Address> fp_read_keys = m_readSetNoMatch_ptr->keys(); - out << "------- xact read set false positives -------" << endl; - for(int i=0; i < fp_read_keys.size(); ++i){ - int count = m_readSetNoMatch_ptr->lookup(fp_read_keys[i]); - //out << "read_false_positive( " << fp_read_keys[i] << " ): " - // << setw(8) << dec << count << endl; - false_reads += count; - } - out << "Total read set false positives : " << setw(8) << false_reads << endl; - out << "-----------------------" << endl; - out << endl; - - //int matching_reads = 0; - long long int matching_reads = m_readSetMatch; - long long int empty_checks = m_readSetEmptyChecks; - Vector<Address> read_keys = m_readSetMatch_ptr->keys(); - out << "------- xact read set matches -------" << endl; - for(int i=0; i < read_keys.size(); ++i){ - int count = m_readSetMatch_ptr->lookup(read_keys[i]); - //out << "read_match( " << read_keys[i] << " ): " - // << setw(8) << dec << count << endl; - matching_reads += count; - } - out << "Total read set matches : " << setw(8) << matching_reads << endl; - out << "Total read set empty checks : " << setw(8) << empty_checks << endl; - double false_positive_pct = 0.0; - if((false_reads + matching_reads)> 0){ - false_positive_pct = (1.0*false_reads)/(false_reads+matching_reads)*100.0; - } - out << "Read set false positives rate : " << false_positive_pct << "%" << endl; - out << "-----------------------" << endl; - out << endl; - - // for write set - //int false_writes = 0; - long long int false_writes = m_writeSetNoMatch; - Vector<Address> fp_write_keys = m_writeSetNoMatch_ptr->keys(); - out << "------- xact write set false positives -------" << endl; - for(int i=0; i < fp_write_keys.size(); ++i){ - int count = m_writeSetNoMatch_ptr->lookup(fp_write_keys[i]); - //out << "write_false_positive( " << fp_write_keys[i] << " ): " - // << setw(8) << dec << count << endl; - false_writes += count; - } - out << "Total write set false positives : " << setw(8) << false_writes << endl; - out << "-----------------------" << endl; - out << endl; - - //int matching_writes = 0; - long long int matching_writes = m_writeSetMatch; - empty_checks = m_writeSetEmptyChecks; - Vector<Address> write_keys = m_writeSetMatch_ptr->keys(); - out << "------- xact write set matches -------" << endl; - for(int i=0; i < write_keys.size(); ++i){ - int count = m_writeSetMatch_ptr->lookup(write_keys[i]); - //out << "write_match( " << write_keys[i] << " ): " - // << setw(8) << dec << count << endl; - matching_writes += count; - } - out << "Total write set matches : " << setw(8) << matching_writes << endl; - out << "Total write set empty checks : " << setw(8) << empty_checks << endl; - false_positive_pct = 0.0; - if((matching_writes+false_writes) > 0){ - false_positive_pct = (1.0*false_writes)/(false_writes+matching_writes)*100.0; - } - out << "Write set false positives rate : " << false_positive_pct << "%" << endl; - out << "-----------------------" << endl; - out << endl; - - out << "----- Xact Signature Stats ------" << endl; - Vector<int> xids = m_xactReadFilterBitsSetOnCommit->keys(); - for(int i=0; i < xids.size(); ++i){ - int xid = xids[i]; - out << "xid " << xid << " Read set bits set on commit: " << (m_xactReadFilterBitsSetOnCommit->lookup(xid)) << endl; - } - xids = m_xactWriteFilterBitsSetOnCommit->keys(); - for(int i=0; i < xids.size(); ++i){ - int xid = xids[i]; - out << "xid " << xid << " Write set bits set on commit: " << (m_xactWriteFilterBitsSetOnCommit->lookup(xid)) << endl; - } - xids = m_xactReadFilterBitsSetOnAbort->keys(); - for(int i=0; i < xids.size(); ++i){ - int xid = xids[i]; - out << "xid " << xid << " Read set bits set on abort: " << (m_xactReadFilterBitsSetOnAbort->lookup(xid)) << endl; - } - xids = m_xactWriteFilterBitsSetOnAbort->keys(); - for(int i=0; i < xids.size(); ++i){ - int xid = xids[i]; - out << "xid " << xid << " Write set bits set on abort: " << (m_xactWriteFilterBitsSetOnAbort->lookup(xid)) << endl; - } - out << endl; - - cout << "------- WATCHPOINTS --------" << endl; - cout << "False Triggers : " << m_watchpointsFalsePositiveTrigger << endl; - cout << "True Triggers : " << m_watchpointsTrueTrigger << endl; - cout << "Total Triggers : " << m_watchpointsTrueTrigger + m_watchpointsFalsePositiveTrigger << endl; - cout << "---------------" << endl; - cout << endl; - } // !short_stats - //m_xact_profiler_ptr->printStats(out, short_stats); // gem5:Arka for decomissioning of log_tm - } // XACT_MEMORY - if (!short_stats) { out << "Request vs. RubySystem State Profile" << endl; out << "--------------------------------" << endl; @@ -993,75 +654,6 @@ void Profiler::clearStats() m_L1D_cache_profiler_ptr->clearStats(); m_L1I_cache_profiler_ptr->clearStats(); m_L2_cache_profiler_ptr->clearStats(); - //m_xact_profiler_ptr->clearStats(); //gem5:Arka for decomissiong of log_tm - - //---- begin XACT_MEM code - ASSERT(m_xactExceptionMap_ptr != NULL); - ASSERT(m_procsInXactMap_ptr != NULL); - ASSERT(m_abortIDMap_ptr != NULL); - ASSERT(m_abortPCMap_ptr != NULL); - ASSERT( m_nackXIDMap_ptr != NULL); - ASSERT(m_nackPCMap_ptr != NULL); - - m_abortStarupDelay = -1; - m_abortPerBlockDelay = -1; - m_transWBs = 0; - m_extraWBs = 0; - m_transactionAborts = 0; - m_transactionLogOverflows = 0; - m_transactionCacheOverflows = 0; - m_transactionUnsupInsts = 0; - m_transactionSaveRestAborts = 0; - m_inferredAborts = 0; - m_xactNacked = 0; - - m_xactLogs.clear(); - m_xactCycles.clear(); - m_xactReads.clear(); - m_xactWrites.clear(); - m_xactSizes.clear(); - m_abortDelays.clear(); - m_xactRetries.clear(); - m_xactOverflowReads.clear(); - m_xactOverflowWrites.clear(); - m_xactLoadMisses.clear(); - m_xactStoreMisses.clear(); - m_xactOverflowTotalReads.clear(); - m_xactOverflowTotalWrites.clear(); - - m_xactExceptionMap_ptr->clear(); - m_procsInXactMap_ptr->clear(); - m_abortIDMap_ptr->clear(); - m_commitIDMap_ptr->clear(); - m_xactRetryIDMap_ptr->clear(); - m_xactCyclesIDMap_ptr->clear(); - m_xactReadSetIDMap_ptr->clear(); - m_xactWriteSetIDMap_ptr->clear(); - m_xactLoadMissIDMap_ptr->clear(); - m_xactStoreMissIDMap_ptr->clear(); - m_xactInstrCountIDMap_ptr->clear(); - m_abortPCMap_ptr->clear(); - m_abortAddressMap_ptr->clear(); - m_nackXIDMap_ptr->clear(); - m_nackXIDPairMap_ptr->clear(); - m_nackPCMap_ptr->clear(); - - m_xactReadFilterBitsSetOnCommit->clear(); - m_xactReadFilterBitsSetOnAbort->clear(); - m_xactWriteFilterBitsSetOnCommit->clear(); - m_xactWriteFilterBitsSetOnAbort->clear(); - - m_readSetEmptyChecks = 0; - m_readSetMatch = 0; - m_readSetNoMatch = 0; - m_writeSetEmptyChecks = 0; - m_writeSetMatch = 0; - m_writeSetNoMatch = 0; - - m_xact_visualizer_last = 0; - m_watchpointsFalsePositiveTrigger = 0; - m_watchpointsTrueTrigger = 0; - //---- end XACT_MEM code // for MemoryControl: m_memReq = 0; @@ -1357,6 +949,27 @@ void Profiler::profileTrainingMask(const Set& pred_set) m_explicit_training_mask.add(pred_set.count()); } +// For MemoryControl: +void Profiler::profileMemReq(int bank) { + m_memReq++; + m_memBankCount[bank]++; +} + +void Profiler::profileMemBankBusy() { m_memBankBusy++; } +void Profiler::profileMemBusBusy() { m_memBusBusy++; } +void Profiler::profileMemReadWriteBusy() { m_memReadWriteBusy++; } +void Profiler::profileMemDataBusBusy() { m_memDataBusBusy++; } +void Profiler::profileMemTfawBusy() { m_memTfawBusy++; } +void Profiler::profileMemRefresh() { m_memRefresh++; } +void Profiler::profileMemRead() { m_memRead++; } +void Profiler::profileMemWrite() { m_memWrite++; } +void Profiler::profileMemWaitCycles(int cycles) { m_memWaitCycles += cycles; } +void Profiler::profileMemInputQ(int cycles) { m_memInputQ += cycles; } +void Profiler::profileMemBankQ(int cycles) { m_memBankQ += cycles; } +void Profiler::profileMemArbWait(int cycles) { m_memArbWait += cycles; } +void Profiler::profileMemRandBusy() { m_memRandBusy++; } +void Profiler::profileMemNotOld() { m_memNotOld++; } + int64 Profiler::getTotalInstructionsExecuted() const { int64 sum = 1; // Starting at 1 allows us to avoid division by zero @@ -1410,885 +1023,3 @@ GenericRequestType Profiler::CacheRequestType_to_GenericRequestType(const CacheR } } -//---- begin Transactional Memory CODE -void Profiler::profileTransaction(int size, int logSize, int readS, int writeS, int overflow_readS, int overflow_writeS, int retries, int useful_cycles, bool nacked, int loadMisses, int storeMisses, int instrCount, int xid){ - m_xactLogs.add(logSize); - m_xactSizes.add(size); - m_xactReads.add(readS); - m_xactWrites.add(writeS); - m_xactRetries.add(retries); - m_xactCycles.add(useful_cycles); - m_xactLoadMisses.add(loadMisses); - m_xactStoreMisses.add(storeMisses); - m_xactInstrCount.add(instrCount); - - // was this transaction nacked? - if(nacked){ - m_xactNacked++; - } - - // for overflowed transactions - if(overflow_readS > 0 || overflow_writeS > 0){ - m_xactOverflowReads.add(overflow_readS); - m_xactOverflowWrites.add(overflow_writeS); - m_xactOverflowTotalReads.add(readS); - m_xactOverflowTotalWrites.add(writeS); - } - - // Record commits by xid - if(!m_commitIDMap_ptr->exist(xid)){ - m_commitIDMap_ptr->add(xid, 1); - m_xactRetryIDMap_ptr->add(xid, retries); - m_xactCyclesIDMap_ptr->add(xid, useful_cycles); - m_xactReadSetIDMap_ptr->add(xid, readS); - m_xactWriteSetIDMap_ptr->add(xid, writeS); - m_xactLoadMissIDMap_ptr->add(xid, loadMisses); - m_xactStoreMissIDMap_ptr->add(xid, storeMisses); - m_xactInstrCountIDMap_ptr->add(xid, instrCount); - } else { - (m_commitIDMap_ptr->lookup(xid))++; - (m_xactRetryIDMap_ptr->lookup(xid)) += retries; - (m_xactCyclesIDMap_ptr->lookup(xid)) += useful_cycles; - (m_xactReadSetIDMap_ptr->lookup(xid)) += readS; - (m_xactWriteSetIDMap_ptr->lookup(xid)) += writeS; - (m_xactLoadMissIDMap_ptr->lookup(xid)) += loadMisses; - (m_xactStoreMissIDMap_ptr->lookup(xid)) += storeMisses; - (m_xactInstrCountIDMap_ptr->lookup(xid)) += instrCount; - } -} - -void Profiler::profileBeginTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen){ - //- if(PROFILE_XACT){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 2)){ - const char* openStr = isOpen ? " OPEN" : " CLOSED"; - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - // The actual processor number - int proc_no = id*RubyConfig::numberofSMTThreads() + thread; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << " XACT BEGIN " << xid - << " PC 0x" << hex << pc.getAddress() - << dec - << " *PC 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << openStr - << endl; - } -} - -void Profiler::profileCommitTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen){ - //- if(PROFILE_XACT){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 2)){ - const char* openStr = isOpen ? " OPEN" : " CLOSED"; - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - // The actual processor number - int proc_no = id*RubyConfig::numberofSMTThreads() + thread; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << " XACT COMMIT " << xid - << " PC 0x" << hex << pc.getAddress() - << dec - << " *PC 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << openStr - << endl; - } - -} - -// for profiling overflows -void Profiler::profileLoadOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow){ - //- if(PROFILE_XACT){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - string overflow_str = " XACT LOAD L1 OVERFLOW "; - if(!l1_overflow){ - overflow_str = " XACT LOAD L2 OVERFLOW "; - } - // The actual processor number - int proc_no = id*RubyConfig::numberofSMTThreads() + thread; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << overflow_str << xid - << " ADDR " << addr - << endl; - } -} - -// for profiling overflows -void Profiler::profileStoreOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow){ - //- if(PROFILE_XACT){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - string overflow_str = " XACT STORE L1 OVERFLOW "; - if(!l1_overflow){ - overflow_str = " XACT STORE L2 OVERFLOW "; - } - // The actual processor number - int proc_no = id*RubyConfig::numberofSMTThreads() + thread; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << overflow_str << xid - << " ADDR " << addr - << endl; - } -} - -void Profiler::profileLoadTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc){ - //- if(PROFILE_XACT){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 3)){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - // The actual processor number - int proc_no = id*RubyConfig::numberofSMTThreads() + thread; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << " XACT LOAD " << xid - << " " << addr - << " VA " << logicalAddress - << " PC " << pc - << " *PC 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - //<< " VAL 0x" << hex << SIMICS_read_physical_memory(proc_no, SIMICS_translate_data_address(proc_no, logicalAddress), 4) << dec - << " VAL 0x" << hex << g_system_ptr->getDriver()->readPhysicalMemory(proc_no, addr.getAddress(), 4) << dec - << endl; - } -} - -void Profiler::profileLoad(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc){ - if(PROFILE_NONXACT){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - // The actual processor number - int proc_no = id*RubyConfig::numberofSMTThreads() + thread; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << " LOAD " << xid - << " " << addr - << " VA " << logicalAddress - << " PC " << pc - //<< " VAL 0x" << hex << SIMICS_read_physical_memory(proc_no, SIMICS_translate_data_address(proc_no, logicalAddress), 4) << dec - << " VAL 0x" << hex << g_system_ptr->getDriver()->readPhysicalMemory(proc_no, addr.getAddress(), 4) << dec - << endl; - } -} - -void Profiler::profileStoreTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc){ - //- if(PROFILE_XACT){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 3)){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - // The actual processor number - int proc_no = id*RubyConfig::numberofSMTThreads() + thread; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << " XACT STORE " << xid - << " " << addr - << " VA " << logicalAddress - << " PC " << pc - << " *PC 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << endl; - } -} - -void Profiler::profileStore(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc){ - if(PROFILE_NONXACT){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - // The actual processor number - int proc_no = id*RubyConfig::numberofSMTThreads() + thread; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << " STORE " << xid - << " " << addr - << " VA " << logicalAddress - << " PC " << pc - << endl; - } -} - -void Profiler::profileNack(NodeID id, int tid, int xid, int thread, int nacking_thread, NodeID nackedBy, Address addr, Address logicalAddress, Address pc, uint64 seq_ts, uint64 nack_ts, bool possibleCycle){ - int nid = 0; // g_system_ptr->getChip(nackedBy/RubyConfig::numberOfProcsPerChip())->getTransactionInterfaceManager(nackedBy%RubyConfig::numberOfProcsPerChip())->getXID(nacking_thread); - assert(0); - //- if(PROFILE_XACT){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - // The actual processor number - int proc_no = id*g_NUM_SMT_THREADS + thread; - int nack_proc_no = nackedBy*g_NUM_SMT_THREADS + nacking_thread; - Address nack_pc = SIMICS_get_program_counter(nack_proc_no); - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << " XACT NACK " << xid - << " by " << nack_proc_no - << " [ " << nackedBy - << ", " << nacking_thread - << " ]" - << " NID: " << nid - << " " << addr - << " VA " << logicalAddress - << " PC " << pc - << " *PC 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << " NackerPC " << nack_pc - << " my_ts " << seq_ts - << " nack_ts " << nack_ts - << " possible_cycle " << possibleCycle - << endl; - } - - // Record nacks by xid - if(!m_nackXIDMap_ptr->exist(xid)){ - m_nackXIDMap_ptr->add(xid, 1); - } else { - (m_nackXIDMap_ptr->lookup(xid))++; - } - - // Record nack ID pairs by xid - if(!m_nackXIDPairMap_ptr->exist(xid)){ - Map<int, int> * new_map = new Map<int, int>; - new_map->add(nid, 1); - m_nackXIDPairMap_ptr->add(xid, new_map); - } - else{ - // retrieve existing map - Map<int, int> * my_map = m_nackXIDPairMap_ptr->lookup(xid); - if(!my_map->exist(nid)){ - my_map->add(nid, 1); - } - else{ - (my_map->lookup(nid))++; - } - } - - // Record nacks by pc - if(!m_nackPCMap_ptr->exist(pc)){ - m_nackPCMap_ptr->add(pc, 1); - } else { - (m_nackPCMap_ptr->lookup(pc))++; - } -} - -void Profiler::profileExposedConflict(NodeID id, int xid, int thread, Address addr, Address pc){ - //if(PROFILE_XACT){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - // The actual processor number - int proc_no = id*g_NUM_SMT_THREADS + thread; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " " - << " EXPOSED ACTION CONFLICT " << xid - << " ADDR " << addr - << " PC " << pc - << endl; - //} -} - -void Profiler::profileInferredAbort(){ - m_inferredAborts++; -} - -void Profiler::profileAbortDelayConstants(int startupDelay, int perBlock){ - m_abortStarupDelay = startupDelay; - m_abortPerBlockDelay = perBlock; -} - -void Profiler::profileAbortTransaction(NodeID id, int tid, int xid, int thread, int delay, int abortingThread, int abortingProc, Address addr, Address pc){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - int abortingXID = -1; - // The actual processor number - int proc_no = id*g_NUM_SMT_THREADS + thread; - // we are passed in physical proc number. Compute logical abort proc_no - int logical_abort_proc_no = abortingProc/g_NUM_SMT_THREADS; - if(abortingProc >= 0){ - AbstractChip * c = g_system_ptr->getChip(logical_abort_proc_no/RubyConfig::numberOfProcsPerChip()); - abortingXID = 0; // c->getTransactionInterfaceManager(logical_abort_proc_no%RubyConfig::numberOfProcsPerChip())->getXID(abortingThread); - assert(0); - } - //- if(PROFILE_XACT){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid - << " XACT ABORT " << xid - << " caused by " << abortingProc - << " [ " << logical_abort_proc_no - << ", " << abortingThread - << " ]" - << " xid: " << abortingXID << " " - << " address: " << addr - << " delay: " << delay - << " PC " << pc - << " *PC 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << endl; - } - m_transactionAborts++; - - // Record aborts by xid - if(!m_abortIDMap_ptr->exist(xid)){ - m_abortIDMap_ptr->add(xid, 1); - } else { - (m_abortIDMap_ptr->lookup(xid))++; - } - m_abortDelays.add(delay); - - // Record aborts by pc - if(!m_abortPCMap_ptr->exist(pc)){ - m_abortPCMap_ptr->add(pc, 1); - } else { - (m_abortPCMap_ptr->lookup(pc))++; - } - - // Record aborts by address - if(!m_abortAddressMap_ptr->exist(addr)){ - m_abortAddressMap_ptr->add(addr, 1); - } else { - (m_abortAddressMap_ptr->lookup(addr))++; - } -} - -void Profiler::profileTransWB(){ - m_transWBs++; -} - -void Profiler::profileExtraWB(){ - m_extraWBs++; -} - -void Profiler::profileXactChange(int procs, int cycles){ - if(!m_procsInXactMap_ptr->exist(procs)){ - m_procsInXactMap_ptr->add(procs, cycles); - } else { - (m_procsInXactMap_ptr->lookup(procs)) += cycles; - } -} - -void Profiler::profileReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread){ - // do NOT count instances when signature is empty! - if(!bf_filter_result && !perfect_filter_result){ - m_readSetEmptyChecks++; - return; - } - - if(bf_filter_result != perfect_filter_result){ - m_readSetNoMatch++; - /* - // we have a false positive - if(!m_readSetNoMatch_ptr->exist(addr)){ - m_readSetNoMatch_ptr->add(addr, 1); - } - else{ - (m_readSetNoMatch_ptr->lookup(addr))++; - } - */ - } - else{ - m_readSetMatch++; - /* - // Bloom filter agrees with perfect filter - if(!m_readSetMatch_ptr->exist(addr)){ - m_readSetMatch_ptr->add(addr, 1); - } - else{ - (m_readSetMatch_ptr->lookup(addr))++; - } - */ - } -} - - -void Profiler::profileRemoteReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread){ - if(bf_filter_result != perfect_filter_result){ - // we have a false positive - if(!m_remoteReadSetNoMatch_ptr->exist(addr)){ - m_remoteReadSetNoMatch_ptr->add(addr, 1); - } - else{ - (m_remoteReadSetNoMatch_ptr->lookup(addr))++; - } - } - else{ - // Bloom filter agrees with perfect filter - if(!m_remoteReadSetMatch_ptr->exist(addr)){ - m_remoteReadSetMatch_ptr->add(addr, 1); - } - else{ - (m_remoteReadSetMatch_ptr->lookup(addr))++; - } - } -} - -void Profiler::profileWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread){ - // do NOT count instances when signature is empty! - if(!bf_filter_result && !perfect_filter_result){ - m_writeSetEmptyChecks++; - return; - } - - if(bf_filter_result != perfect_filter_result){ - m_writeSetNoMatch++; - /* - // we have a false positive - if(!m_writeSetNoMatch_ptr->exist(addr)){ - m_writeSetNoMatch_ptr->add(addr, 1); - } - else{ - (m_writeSetNoMatch_ptr->lookup(addr))++; - } - */ - } - else{ - m_writeSetMatch++; - /* - // Bloom filter agrees with perfect filter - if(!m_writeSetMatch_ptr->exist(addr)){ - m_writeSetMatch_ptr->add(addr, 1); - } - else{ - (m_writeSetMatch_ptr->lookup(addr))++; - } - */ - } -} - - -void Profiler::profileRemoteWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread){ - if(bf_filter_result != perfect_filter_result){ - // we have a false positive - if(!m_remoteWriteSetNoMatch_ptr->exist(addr)){ - m_remoteWriteSetNoMatch_ptr->add(addr, 1); - } - else{ - (m_remoteWriteSetNoMatch_ptr->lookup(addr))++; - } - } - else{ - // Bloom filter agrees with perfect filter - if(!m_remoteWriteSetMatch_ptr->exist(addr)){ - m_remoteWriteSetMatch_ptr->add(addr, 1); - } - else{ - (m_remoteWriteSetMatch_ptr->lookup(addr))++; - } - } -} - -void Profiler::profileTransactionLogOverflow(NodeID id, Address addr, Address pc){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << id << " " - << " XACT LOG OVERFLOW" - << " ADDR " << addr - << " PC " << pc - << " *PC 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << endl; - - } - m_transactionLogOverflows++; -} - -void Profiler::profileTransactionCacheOverflow(NodeID id, Address addr, Address pc){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << id << " " - << " XACT CACHE OVERFLOW " - << " ADDR " << addr - << " PC " << pc - << " *PC 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << endl; - - } - m_transactionCacheOverflows++; -} - -void Profiler::profileGetCPS(NodeID id, uint32 cps, Address pc){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << id << " " - << " XACT GET CPS" - << " PC " << pc - << " *PC 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << " CPS 0x" << hex << cps << dec - << endl; - } -} -//---- end Transactional Memory CODE - - -void Profiler::profileExceptionStart(bool xact, NodeID id, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc){ - if(xact){ - if(!m_xactExceptionMap_ptr->exist(val)){ - m_xactExceptionMap_ptr->add(val, 1); - } else { - (m_xactExceptionMap_ptr->lookup(val))++; - } - } - - if (!xact && !PROFILE_NONXACT) return; - - if(PROFILE_EXCEPTIONS){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - // The actual processor number - int proc_no = id*g_NUM_SMT_THREADS + thread; - - // get the excepting instruction - const char * instruction; - physical_address_t addr = SIMICS_translate_address( proc_no, Address(pc)); - if(val != 0x64 && addr != 0x0){ - // ignore instruction TLB miss - instruction = SIMICS_disassemble_physical( proc_no, addr ); - } - - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << " ]" << " "; - if (xact) - (* debug_cout_ptr) << " XACT Exception("; - else - (* debug_cout_ptr) << " Exception("; - - (* debug_cout_ptr) << hex << val << dec << ")_START--Trap Level " << trap_level - << "--(PC=0x" << hex << pc << ", " << npc << ")" - << dec; - - if(val != 0x64 && addr != 0x0){ - (* debug_cout_ptr) << " instruction = " << instruction; - } - else{ - (* debug_cout_ptr) << " instruction = INSTRUCTION TLB MISS"; - } - (* debug_cout_ptr) << dec << endl; - } -} - -void Profiler::profileExceptionDone(bool xact, NodeID id, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc, uinteger_t tpc, uinteger_t tnpc){ - if (!xact && !PROFILE_NONXACT) return; - - if (PROFILE_EXCEPTIONS){ - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - // The actual processor number - int proc_no = id*g_NUM_SMT_THREADS + thread; - - // get the excepting instruction - const char * instruction; - instruction = SIMICS_disassemble_physical( proc_no, SIMICS_translate_address( proc_no, Address(pc) ) ); - - - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << " ]" << " "; - if (xact) - (* debug_cout_ptr) << " XACT Exception("; - else - (* debug_cout_ptr) << " Exception("; - - (* debug_cout_ptr) << hex << val << dec << ")_DONE--Trap Level " << trap_level - << "--(PC=0x" << hex << pc << ", " << npc << dec << ")" - << "--(TPC=0x" << hex << tpc << ", " << tnpc << dec << ")" - << endl; - } -} - -void Profiler::rubyWatch(int id){ - int rn_g1 = SIMICS_get_register_number(id, "g1"); - uint64 tr = SIMICS_read_register(id, rn_g1); - Address watch_address = Address(tr); - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << id << " " - << "RUBY WATCH " - << watch_address - << endl; - - if(!m_watch_address_list_ptr->exist(watch_address)){ - m_watch_address_list_ptr->add(watch_address, 1); - } -} - -bool Profiler::watchAddress(Address addr){ - if (m_watch_address_list_ptr->exist(addr)) - return true; - else - return false; -} - -void Profiler::profileReadFilterBitsSet(int xid, int bits, bool isCommit) { - if (isCommit) { - if(!m_xactReadFilterBitsSetOnCommit->exist(xid)){ - Histogram hist; - hist.add(bits); - m_xactReadFilterBitsSetOnCommit->add(xid, hist); - } - else{ - (m_xactReadFilterBitsSetOnCommit->lookup(xid)).add(bits); - } - } else { - if(!m_xactReadFilterBitsSetOnAbort->exist(xid)){ - Histogram hist; - hist.add(bits); - m_xactReadFilterBitsSetOnAbort->add(xid, hist); - } - else{ - (m_xactReadFilterBitsSetOnAbort->lookup(xid)).add(bits); - } - } -} - -void Profiler::profileWriteFilterBitsSet(int xid, int bits, bool isCommit) { - if (isCommit) { - if(!m_xactWriteFilterBitsSetOnCommit->exist(xid)){ - Histogram hist; - hist.add(bits); - m_xactWriteFilterBitsSetOnCommit->add(xid, hist); - } - else{ - (m_xactWriteFilterBitsSetOnCommit->lookup(xid)).add(bits); - } - } else { - if(!m_xactWriteFilterBitsSetOnAbort->exist(xid)){ - Histogram hist; - hist.add(bits); - m_xactWriteFilterBitsSetOnAbort->add(xid, hist); - } - else{ - (m_xactWriteFilterBitsSetOnAbort->lookup(xid)).add(bits); - } - } -} -/* - //gem5:Arka for decomissioning log_tm - -void Profiler::setXactVisualizerFile(char * filename){ - if ( (filename == NULL) || - (!strcmp(filename, "none")) ) { - m_xact_visualizer_ptr = &cout; - return; - } - - if (m_xact_visualizer.is_open() ) { - m_xact_visualizer.close (); - } - m_xact_visualizer.open (filename, std::ios::out); - if (! m_xact_visualizer.is_open() ) { - cerr << "setXactVisualizer: can't open file " << filename << endl; - } - else { - m_xact_visualizer_ptr = &m_xact_visualizer; - } - cout << "setXactVisualizer file " << filename << endl; -} - -void Profiler::printTransactionState(bool can_skip){ - if (!XACT_VISUALIZER) return; - int num_processors = RubyConfig::numberOfProcessors() * RubyConfig::numberofSMTThreads(); - - if (!g_system_ptr->getXactVisualizer()->existXactActivity() && can_skip) - return; - - if (can_skip && ((g_eventQueue_ptr->getTime()/10000) <= m_xact_visualizer_last)) - return; - - Vector<char> xactStateVector = g_system_ptr->getXactVisualizer()->getTransactionStateVector(); - for (int i = 0 ; i < num_processors; i++){ - (* m_xact_visualizer_ptr) << xactStateVector[i] << " "; - } - (* m_xact_visualizer_ptr) << " " << g_eventQueue_ptr->getTime() << endl; - m_xact_visualizer_last = g_eventQueue_ptr->getTime() / 10000; -} -*/ -void Profiler::watchpointsFalsePositiveTrigger() -{ - m_watchpointsFalsePositiveTrigger++; -} - -void Profiler::watchpointsTrueTrigger() -{ - m_watchpointsTrueTrigger++; -} - -// For MemoryControl: -void Profiler::profileMemReq(int bank) { - m_memReq++; - m_memBankCount[bank]++; -} -void Profiler::profileMemBankBusy() { m_memBankBusy++; } -void Profiler::profileMemBusBusy() { m_memBusBusy++; } -void Profiler::profileMemReadWriteBusy() { m_memReadWriteBusy++; } -void Profiler::profileMemDataBusBusy() { m_memDataBusBusy++; } -void Profiler::profileMemTfawBusy() { m_memTfawBusy++; } -void Profiler::profileMemRefresh() { m_memRefresh++; } -void Profiler::profileMemRead() { m_memRead++; } -void Profiler::profileMemWrite() { m_memWrite++; } -void Profiler::profileMemWaitCycles(int cycles) { m_memWaitCycles += cycles; } -void Profiler::profileMemInputQ(int cycles) { m_memInputQ += cycles; } -void Profiler::profileMemBankQ(int cycles) { m_memBankQ += cycles; } -void Profiler::profileMemArbWait(int cycles) { m_memArbWait += cycles; } -void Profiler::profileMemRandBusy() { m_memRandBusy++; } -void Profiler::profileMemNotOld() { m_memNotOld++; } - - -//----------- ATMTP -------------------// - -void Profiler::profileTransactionTCC(NodeID id, Address pc){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - cout.flags(ios::right); - cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - cout << setw(ID_SPACES) << id << " " - << " XACT Aborting! Executed TCC " - << " PC: " << pc - << " *PC: 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << endl; - } - m_transactionUnsupInsts++; -} - -void Profiler::profileTransactionUnsupInst(NodeID id, Address pc){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - cout.flags(ios::right); - cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - cout << setw(ID_SPACES) << id << " " - << " XACT Aborting! Executed Unsupported Instruction " - << " PC: " << pc - << " *PC: 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << endl; - } - m_transactionUnsupInsts++; -} - -void Profiler::profileTransactionSaveInst(NodeID id, Address pc){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - cout.flags(ios::right); - cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - cout << setw(ID_SPACES) << id << " " - << " XACT Aborting! Executed Save Instruction " - << " PC: " << pc - << " *PC: 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << endl; - } - m_transactionSaveRestAborts++; -} - -void Profiler::profileTransactionRestoreInst(NodeID id, Address pc){ - if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ - physical_address_t myPhysPC = SIMICS_translate_address(id, pc); - integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); - const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); - - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - cout.flags(ios::right); - cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - cout << setw(ID_SPACES) << id << " " - << " XACT Aborting! Executed Restore Instruction " - << " PC: " << pc - << " *PC: 0x" << hex << myInst << dec - << " '" << myInstStr << "'" - << endl; - } - m_transactionSaveRestAborts++; -} - -void Profiler::profileTimerInterrupt(NodeID id, - uinteger_t tick, uinteger_t tick_cmpr, - uinteger_t stick, uinteger_t stick_cmpr, - int trap_level, - uinteger_t pc, uinteger_t npc, - uinteger_t pstate, int pil){ - if (PROFILE_EXCEPTIONS) { - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - cout.flags(ios::right); - cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - cout << setw(ID_SPACES) << id << " "; - cout << hex << "Timer--(Tick=0x" << tick << ", TckCmp=0x" << tick_cmpr - << ", STick=0x" << stick << ", STickCmp=0x" << stick_cmpr - << ")--(PC=" << pc << ", " << npc - << dec << ")--(TL=" << trap_level << ", pil=" << pil - << hex << ", pstate=0x" << pstate - << dec << ")" << endl; - } -} diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 2961a81d1..aa018029c 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -27,19 +27,19 @@ */ /* - This file has been modified by Kevin Moore and Dan Nussbaum of the - Scalable Systems Research Group at Sun Microsystems Laboratories - (http://research.sun.com/scalable/) to support the Adaptive - Transactional Memory Test Platform (ATMTP). + This file has been modified by Kevin Moore and Dan Nussbaum of the + Scalable Systems Research Group at Sun Microsystems Laboratories + (http://research.sun.com/scalable/) to support the Adaptive + Transactional Memory Test Platform (ATMTP). - Please send email to atmtp-interest@sun.com with feedback, questions, or - to request future announcements about ATMTP. + Please send email to atmtp-interest@sun.com with feedback, questions, or + to request future announcements about ATMTP. - ---------------------------------------------------------------------- + ---------------------------------------------------------------------- - File modification date: 2008-02-23 + File modification date: 2008-02-23 - ---------------------------------------------------------------------- + ---------------------------------------------------------------------- */ /* @@ -68,7 +68,6 @@ #include "Set.hh" #include "CacheRequestType.hh" #include "GenericRequestType.hh" -//#include "XactProfiler.hh" //gem5:Arka for decomissioning og log_tm class CacheMsg; class CacheProfiler; @@ -78,355 +77,229 @@ template <class KEY_TYPE, class VALUE_TYPE> class Map; class Profiler : public Consumer { public: - // Constructors - Profiler(); - - // Destructor - ~Profiler(); - - // Public Methods - void wakeup(); - - void setPeriodicStatsFile(const string& filename); - void setPeriodicStatsInterval(integer_t period); - - void setXactVisualizerFile(char* filename); - - void printStats(ostream& out, bool short_stats=false); - void printShortStats(ostream& out) { printStats(out, true); } - void printTraceStats(ostream& out) const; - void clearStats(); - void printConfig(ostream& out) const; - void printResourceUsage(ostream& out) const; - - AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } - AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } - //XactProfiler* getXactProfiler() { return m_xact_profiler_ptr;} //gem5:Arka for decomissioning og log_tm - - void addPrimaryStatSample(const CacheMsg& msg, NodeID id); - void addSecondaryStatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); - void addSecondaryStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); - void addAddressTraceSample(const CacheMsg& msg, NodeID id); - - void profileRequest(const string& requestStr); - void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner); - - void profileMulticastRetry(const Address& addr, int count); - - void profileFilterAction(int action); - - void profileConflictingRequests(const Address& addr); - void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); } - void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } - void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } - - void countBAUnicast() { m_num_BA_unicasts++; } - void countBABroadcast() { m_num_BA_broadcasts++; } - - void recordPrediction(bool wasGood, bool wasPredicted); - - void startTransaction(int cpu); - void endTransaction(int cpu); - void profilePFWait(Time waitTime); - - void controllerBusy(MachineID machID); - void bankBusy(); - void missLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); - void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); - void stopTableUsageSample(int num) { m_stopTableProfile.add(num); } - void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); } - void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); } - void sequencerRequests(int num) { m_sequencer_requests.add(num); } - void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);} - - void profileGetXMaskPrediction(const Set& pred_set); - void profileGetSMaskPrediction(const Set& pred_set); - void profileTrainingMask(const Set& pred_set); - void profileTransition(const string& component, NodeID id, NodeID version, Address addr, - const string& state, const string& event, - const string& next_state, const string& note); - void profileMsgDelay(int virtualNetwork, int delayCycles); - - void print(ostream& out) const; - - int64 getTotalInstructionsExecuted() const; - int64 getTotalTransactionsExecuted() const; - - //---- begin Transactional Memory CODE - #if 0 //gem5:Arka for decomissioning og log_tm - void profileTransCycles(int proc, int cycles) { getXactProfiler()->profileTransCycles(proc, cycles);} - void profileNonTransCycles(int proc, int cycles) { getXactProfiler()->profileNonTransCycles(proc, cycles);} - void profileStallTransCycles(int proc, int cycles) { getXactProfiler()->profileStallTransCycles(proc, cycles); } - void profileStallNonTransCycles(int proc, int cycles) { getXactProfiler()->profileStallNonTransCycles(proc, cycles); } - void profileAbortingTransCycles(int proc, int cycles) { getXactProfiler()->profileAbortingTransCycles(proc, cycles); } - void profileCommitingTransCycles(int proc, int cycles) { getXactProfiler()->profileCommitingTransCycles(proc, cycles); } - void profileBarrierCycles(int proc, int cycles) { getXactProfiler()->profileBarrierCycles(proc, cycles);} - void profileBackoffTransCycles(int proc, int cycles) { getXactProfiler()->profileBackoffTransCycles(proc, cycles); } - void profileGoodTransCycles(int proc, int cycles) {getXactProfiler()->profileGoodTransCycles(proc, cycles); } - - #endif //gem5:Arka TODO clean up the rest of this functions as well - void profileTransaction(int size, int logSize, int readS, int writeS, int overflow_readS, int overflow_writeS, int retries, int cycles, bool nacked, int loadMisses, int storeMisses, int instrCount, int xid); - void profileBeginTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen); - void profileCommitTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen); - void profileLoadTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc); - void profileLoad(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc); - void profileStoreTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc); - void profileStore(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc); - void profileLoadOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow); - void profileStoreOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow); - void profileNack(NodeID id, int tid, int xid, int thread, int nacking_thread, NodeID nackedBy, Address addr, Address logicalAddress, Address pc, uint64 seq_ts, uint64 nack_ts, bool possibleCycle); - void profileExposedConflict(NodeID id, int xid, int thread, Address addr, Address pc); - void profileTransWB(); - void profileExtraWB(); - void profileInferredAbort(); - void profileAbortTransaction(NodeID id, int tid, int xid, int thread, int delay, int abortingThread, int abortingProc, Address addr, Address pc); - void profileExceptionStart(bool xact, NodeID proc_no, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc); - void profileExceptionDone(bool xact, NodeID proc_no, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc, uinteger_t tpc, uinteger_t tnpc); - void profileTimerInterrupt(NodeID id, - uinteger_t tick, uinteger_t tick_cmpr, - uinteger_t stick, uinteger_t stick_cmpr, - int trap_level, - uinteger_t pc, uinteger_t npc, - uinteger_t pstate, int pil); - - void profileAbortDelayConstants(int handlerStartupDelay, int handlerPerBlockDelay); - void profileXactChange(int procs, int cycles); - void profileReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread); - void profileWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread); - void profileRemoteReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread); - void profileRemoteWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread); - - - void profileReadFilterBitsSet(int xid, int bits, bool isCommit); - void profileWriteFilterBitsSet(int xid, int bits, bool isCommit); - - void printTransactionState(bool can_skip); - - void watchpointsFalsePositiveTrigger(); - void watchpointsTrueTrigger(); - - void profileTransactionLogOverflow(NodeID id, Address addr, Address pc); - void profileTransactionCacheOverflow(NodeID id, Address addr, Address pc); - void profileGetCPS(NodeID id, uint32 cps, Address pc); - void profileTransactionTCC(NodeID id, Address pc); - void profileTransactionUnsupInst(NodeID id, Address pc); - void profileTransactionSaveInst(NodeID id, Address pc); - void profileTransactionRestoreInst(NodeID id, Address pc); - - //---- end Transactional Memory CODE - - void rubyWatch(int proc); - bool watchAddress(Address addr); - - // return Ruby's start time - Time getRubyStartTime(){ - return m_ruby_start; - } - - // added for MemoryControl: - void profileMemReq(int bank); - void profileMemBankBusy(); - void profileMemBusBusy(); - void profileMemTfawBusy(); - void profileMemReadWriteBusy(); - void profileMemDataBusBusy(); - void profileMemRefresh(); - void profileMemRead(); - void profileMemWrite(); - void profileMemWaitCycles(int cycles); - void profileMemInputQ(int cycles); - void profileMemBankQ(int cycles); - void profileMemArbWait(int cycles); - void profileMemRandBusy(); - void profileMemNotOld(); + // Constructors + Profiler(); + + // Destructor + ~Profiler(); + + // Public Methods + void wakeup(); + + void setPeriodicStatsFile(const string& filename); + void setPeriodicStatsInterval(integer_t period); + + void printStats(ostream& out, bool short_stats=false); + void printShortStats(ostream& out) { printStats(out, true); } + void printTraceStats(ostream& out) const; + void clearStats(); + void printConfig(ostream& out) const; + void printResourceUsage(ostream& out) const; + + AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } + AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } + + void addPrimaryStatSample(const CacheMsg& msg, NodeID id); + void addSecondaryStatSample(GenericRequestType requestType, + AccessModeType type, int msgSize, + PrefetchBit pfBit, NodeID id); + void addSecondaryStatSample(CacheRequestType requestType, + AccessModeType type, int msgSize, + PrefetchBit pfBit, NodeID id); + void addAddressTraceSample(const CacheMsg& msg, NodeID id); + + void profileRequest(const string& requestStr); + void profileSharing(const Address& addr, AccessType type, + NodeID requestor, const Set& sharers, + const Set& owner); + + void profileMulticastRetry(const Address& addr, int count); + + void profileFilterAction(int action); + + void profileConflictingRequests(const Address& addr); + void profileOutstandingRequest(int outstanding) { + m_outstanding_requests.add(outstanding); + } + + void profileOutstandingPersistentRequest(int outstanding) { + m_outstanding_persistent_requests.add(outstanding); + } + void profileAverageLatencyEstimate(int latency) { + m_average_latency_estimate.add(latency); + } + + void countBAUnicast() { m_num_BA_unicasts++; } + void countBABroadcast() { m_num_BA_broadcasts++; } + + void recordPrediction(bool wasGood, bool wasPredicted); + + void startTransaction(int cpu); + void endTransaction(int cpu); + void profilePFWait(Time waitTime); + + void controllerBusy(MachineID machID); + void bankBusy(); + void missLatency(Time t, CacheRequestType type, + GenericMachineType respondingMach); + void swPrefetchLatency(Time t, CacheRequestType type, + GenericMachineType respondingMach); + void stopTableUsageSample(int num) { m_stopTableProfile.add(num); } + void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); } + void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); } + void sequencerRequests(int num) { m_sequencer_requests.add(num); } + void storeBuffer(int size, int blocks) { + m_store_buffer_size.add(size); + m_store_buffer_blocks.add(blocks); + } + + void profileGetXMaskPrediction(const Set& pred_set); + void profileGetSMaskPrediction(const Set& pred_set); + void profileTrainingMask(const Set& pred_set); + void profileTransition(const string& component, NodeID id, NodeID version, + Address addr, const string& state, + const string& event, const string& next_state, + const string& note); + void profileMsgDelay(int virtualNetwork, int delayCycles); + + void print(ostream& out) const; + + int64 getTotalInstructionsExecuted() const; + int64 getTotalTransactionsExecuted() const; + + Time getRubyStartTime(){ + return m_ruby_start; + } + + // added for MemoryControl: + void profileMemReq(int bank); + void profileMemBankBusy(); + void profileMemBusBusy(); + void profileMemTfawBusy(); + void profileMemReadWriteBusy(); + void profileMemDataBusBusy(); + void profileMemRefresh(); + void profileMemRead(); + void profileMemWrite(); + void profileMemWaitCycles(int cycles); + void profileMemInputQ(int cycles); + void profileMemBankQ(int cycles); + void profileMemArbWait(int cycles); + void profileMemRandBusy(); + void profileMemNotOld(); private: - // Private Methods - void addL2StatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); - void addL1DStatSample(const CacheMsg& msg, NodeID id); - void addL1IStatSample(const CacheMsg& msg, NodeID id); - - GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type); - - // Private copy constructor and assignment operator - Profiler(const Profiler& obj); - Profiler& operator=(const Profiler& obj); - - // Data Members (m_ prefix) - CacheProfiler* m_L1D_cache_profiler_ptr; - CacheProfiler* m_L1I_cache_profiler_ptr; - CacheProfiler* m_L2_cache_profiler_ptr; - AddressProfiler* m_address_profiler_ptr; - AddressProfiler* m_inst_profiler_ptr; - -// XactProfiler* m_xact_profiler_ptr; // gem5:Arka for decomissioning of log_tm - - Vector<int64> m_instructions_executed_at_start; - Vector<int64> m_cycles_executed_at_start; - - ostream* m_periodic_output_file_ptr; - integer_t m_stats_period; - std::fstream m_xact_visualizer; - std::ostream *m_xact_visualizer_ptr; - - Time m_ruby_start; - time_t m_real_time_start_time; - - int m_num_BA_unicasts; - int m_num_BA_broadcasts; - - Vector<integer_t> m_perProcTotalMisses; - Vector<integer_t> m_perProcUserMisses; - Vector<integer_t> m_perProcSupervisorMisses; - Vector<integer_t> m_perProcStartTransaction; - Vector<integer_t> m_perProcEndTransaction; - Vector < Vector < integer_t > > m_busyControllerCount; - integer_t m_busyBankCount; - Histogram m_multicast_retry_histogram; - - Histogram m_L1tbeProfile; - Histogram m_L2tbeProfile; - Histogram m_stopTableProfile; - - Histogram m_filter_action_histogram; - Histogram m_tbeProfile; - - Histogram m_sequencer_requests; - Histogram m_store_buffer_size; - Histogram m_store_buffer_blocks; - Histogram m_read_sharing_histogram; - Histogram m_write_sharing_histogram; - Histogram m_all_sharing_histogram; - int64 m_cache_to_cache; - int64 m_memory_to_cache; - - Histogram m_prefetchWaitHistogram; - - Vector<Histogram> m_missLatencyHistograms; - Vector<Histogram> m_machLatencyHistograms; - Histogram m_L2MissLatencyHistogram; - Histogram m_allMissLatencyHistogram; - - Histogram m_allSWPrefetchLatencyHistogram; - Histogram m_SWPrefetchL2MissLatencyHistogram; - Vector<Histogram> m_SWPrefetchLatencyHistograms; - Vector<Histogram> m_SWPrefetchMachLatencyHistograms; - - Histogram m_delayedCyclesHistogram; - Histogram m_delayedCyclesNonPFHistogram; - Vector<Histogram> m_delayedCyclesVCHistograms; - - int m_predictions; - int m_predictionOpportunities; - int m_goodPredictions; - - Histogram m_gets_mask_prediction; - Histogram m_getx_mask_prediction; - Histogram m_explicit_training_mask; - - // For profiling possibly conflicting requests - Map<Address, Time>* m_conflicting_map_ptr; - Histogram m_conflicting_histogram; - - Histogram m_outstanding_requests; - Histogram m_outstanding_persistent_requests; - - Histogram m_average_latency_estimate; - - //---- begin Transactional Memory CODE - Map <int, int>* m_procsInXactMap_ptr; - - Histogram m_xactCycles; - Histogram m_xactLogs; - Histogram m_xactReads; - Histogram m_xactWrites; - Histogram m_xactOverflowReads; - Histogram m_xactOverflowWrites; - Histogram m_xactOverflowTotalReads; - Histogram m_xactOverflowTotalWrites; - Histogram m_xactSizes; - Histogram m_xactRetries; - Histogram m_abortDelays; - Histogram m_xactLoadMisses; - Histogram m_xactStoreMisses; - Histogram m_xactInstrCount; - int m_xactNacked; - int m_transactionAborts; - int m_transWBs; - int m_extraWBs; - int m_abortStarupDelay; - int m_abortPerBlockDelay; - int m_inferredAborts; - Map <int, int>* m_nackXIDMap_ptr; - // pairs of XIDs involved in NACKs - Map<int, Map<int, int> * > * m_nackXIDPairMap_ptr; - Map <Address, int>* m_nackPCMap_ptr; - Map <int, int>* m_xactExceptionMap_ptr; - Map <int, int>* m_abortIDMap_ptr; - Map <int, int>* m_commitIDMap_ptr; - Map <int, int>* m_xactRetryIDMap_ptr; - Map <int, int>* m_xactCyclesIDMap_ptr; - Map <int, int>* m_xactReadSetIDMap_ptr; - Map <int, int>* m_xactWriteSetIDMap_ptr; - Map <int, int>* m_xactLoadMissIDMap_ptr; - Map <int, int>* m_xactStoreMissIDMap_ptr; - Map <int, integer_t> *m_xactInstrCountIDMap_ptr; - Map <Address, int>* m_abortPCMap_ptr; - Map <Address, int>* m_abortAddressMap_ptr; - Map <Address, int>* m_readSetMatch_ptr; - Map <Address, int>* m_readSetNoMatch_ptr; - Map <Address, int>* m_writeSetMatch_ptr; - Map <Address, int>* m_writeSetNoMatch_ptr; - Map <Address, int>* m_remoteReadSetMatch_ptr; - Map <Address, int>* m_remoteReadSetNoMatch_ptr; - Map <Address, int>* m_remoteWriteSetMatch_ptr; - Map <Address, int>* m_remoteWriteSetNoMatch_ptr; - long long int m_readSetEmptyChecks; - long long int m_readSetMatch; - long long int m_readSetNoMatch; - long long int m_writeSetEmptyChecks; - long long int m_writeSetMatch; - long long int m_writeSetNoMatch; - Map<int, Histogram> * m_xactReadFilterBitsSetOnCommit; - Map<int, Histogram> * m_xactReadFilterBitsSetOnAbort; - Map<int, Histogram> * m_xactWriteFilterBitsSetOnCommit; - Map<int, Histogram> * m_xactWriteFilterBitsSetOnAbort; - - unsigned int m_watchpointsFalsePositiveTrigger; - unsigned int m_watchpointsTrueTrigger; - - int m_transactionUnsupInsts; - int m_transactionSaveRestAborts; - - int m_transactionLogOverflows; - int m_transactionCacheOverflows; - - //---- end Transactional Memory CODE - - Map<Address, int>* m_watch_address_list_ptr; - // counts all initiated cache request including PUTs - int m_requests; - Map <string, int>* m_requestProfileMap_ptr; - - Time m_xact_visualizer_last; - - // added for MemoryControl: - long long int m_memReq; - long long int m_memBankBusy; - long long int m_memBusBusy; - long long int m_memTfawBusy; - long long int m_memReadWriteBusy; - long long int m_memDataBusBusy; - long long int m_memRefresh; - long long int m_memRead; - long long int m_memWrite; - long long int m_memWaitCycles; - long long int m_memInputQ; - long long int m_memBankQ; - long long int m_memArbWait; - long long int m_memRandBusy; - long long int m_memNotOld; - Vector<long long int> m_memBankCount; + // Private Methods + void addL2StatSample(GenericRequestType requestType, AccessModeType type, + int msgSize, PrefetchBit pfBit, NodeID id); + void addL1DStatSample(const CacheMsg& msg, NodeID id); + void addL1IStatSample(const CacheMsg& msg, NodeID id); + + GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type); + + // Private copy constructor and assignment operator + Profiler(const Profiler& obj); + Profiler& operator=(const Profiler& obj); + + // Data Members (m_ prefix) + CacheProfiler* m_L1D_cache_profiler_ptr; + CacheProfiler* m_L1I_cache_profiler_ptr; + CacheProfiler* m_L2_cache_profiler_ptr; + AddressProfiler* m_address_profiler_ptr; + AddressProfiler* m_inst_profiler_ptr; + + Vector<int64> m_instructions_executed_at_start; + Vector<int64> m_cycles_executed_at_start; + + ostream* m_periodic_output_file_ptr; + integer_t m_stats_period; + + Time m_ruby_start; + time_t m_real_time_start_time; + + int m_num_BA_unicasts; + int m_num_BA_broadcasts; + + Vector<integer_t> m_perProcTotalMisses; + Vector<integer_t> m_perProcUserMisses; + Vector<integer_t> m_perProcSupervisorMisses; + Vector<integer_t> m_perProcStartTransaction; + Vector<integer_t> m_perProcEndTransaction; + Vector < Vector < integer_t > > m_busyControllerCount; + integer_t m_busyBankCount; + Histogram m_multicast_retry_histogram; + + Histogram m_L1tbeProfile; + Histogram m_L2tbeProfile; + Histogram m_stopTableProfile; + + Histogram m_filter_action_histogram; + Histogram m_tbeProfile; + + Histogram m_sequencer_requests; + Histogram m_store_buffer_size; + Histogram m_store_buffer_blocks; + Histogram m_read_sharing_histogram; + Histogram m_write_sharing_histogram; + Histogram m_all_sharing_histogram; + int64 m_cache_to_cache; + int64 m_memory_to_cache; + + Histogram m_prefetchWaitHistogram; + + Vector<Histogram> m_missLatencyHistograms; + Vector<Histogram> m_machLatencyHistograms; + Histogram m_L2MissLatencyHistogram; + Histogram m_allMissLatencyHistogram; + + Histogram m_allSWPrefetchLatencyHistogram; + Histogram m_SWPrefetchL2MissLatencyHistogram; + Vector<Histogram> m_SWPrefetchLatencyHistograms; + Vector<Histogram> m_SWPrefetchMachLatencyHistograms; + + Histogram m_delayedCyclesHistogram; + Histogram m_delayedCyclesNonPFHistogram; + Vector<Histogram> m_delayedCyclesVCHistograms; + + int m_predictions; + int m_predictionOpportunities; + int m_goodPredictions; + + Histogram m_gets_mask_prediction; + Histogram m_getx_mask_prediction; + Histogram m_explicit_training_mask; + + // For profiling possibly conflicting requests + Map<Address, Time>* m_conflicting_map_ptr; + Histogram m_conflicting_histogram; + + Histogram m_outstanding_requests; + Histogram m_outstanding_persistent_requests; + + Histogram m_average_latency_estimate; + + Map<Address, int>* m_watch_address_list_ptr; + // counts all initiated cache request including PUTs + int m_requests; + Map <string, int>* m_requestProfileMap_ptr; + + // added for MemoryControl: + long long int m_memReq; + long long int m_memBankBusy; + long long int m_memBusBusy; + long long int m_memTfawBusy; + long long int m_memReadWriteBusy; + long long int m_memDataBusBusy; + long long int m_memRefresh; + long long int m_memRead; + long long int m_memWrite; + long long int m_memWaitCycles; + long long int m_memInputQ; + long long int m_memBankQ; + long long int m_memArbWait; + long long int m_memRandBusy; + long long int m_memNotOld; + Vector<long long int> m_memBankCount; }; @@ -439,9 +312,9 @@ ostream& operator<<(ostream& out, const Profiler& obj); extern inline ostream& operator<<(ostream& out, const Profiler& obj) { - obj.print(out); - out << flush; - return out; + obj.print(out); + out << flush; + return out; } #endif //PROFILER_H |