diff options
Diffstat (limited to 'src/mem/ruby/profiler/Profiler.cc')
-rw-r--r-- | src/mem/ruby/profiler/Profiler.cc | 577 |
1 files changed, 189 insertions, 388 deletions
diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index d0ea7921c..6f7da1eda 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,32 +61,21 @@ using namespace std; using m5::stl_helpers::operator<<; -Profiler::Profiler(const Params *p) - : SimObject(p) +Profiler::Profiler(const RubySystemParams *p) + : m_IncompleteTimes(MachineType_NUM) { - m_inst_profiler_ptr = NULL; - m_address_profiler_ptr = NULL; - m_real_time_start_time = time(NULL); // Not reset in clearStats() - m_hot_lines = p->hot_lines; m_all_instructions = p->all_instructions; - m_num_of_sequencers = p->num_of_sequencers; - - m_hot_lines = false; - m_all_instructions = false; - - m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers); m_address_profiler_ptr->setHotLines(m_hot_lines); m_address_profiler_ptr->setAllInstructions(m_all_instructions); if (m_all_instructions) { - m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers); m_inst_profiler_ptr->setHotLines(m_hot_lines); m_inst_profiler_ptr->setAllInstructions(m_all_instructions); } - - p->ruby_system->registerProfiler(this); } Profiler::~Profiler() @@ -94,74 +83,176 @@ Profiler::~Profiler() } void -Profiler::print(ostream& out) const +Profiler::regStats(const std::string &pName) { - out << "[Profiler]"; -} - -void -Profiler::printRequestProfile(ostream &out) const -{ - out << "Request vs. RubySystem State Profile" << endl; - out << "--------------------------------" << endl; - out << endl; + if (!m_all_instructions) { + m_address_profiler_ptr->regStats(pName); + } - map<string, uint64_t> m_requestProfileMap; - uint64_t m_requests = 0; + if (m_all_instructions) { + m_inst_profiler_ptr->regStats(pName); + } - for (uint32_t i = 0; i < MachineType_NUM; i++) { - for (map<uint32_t, AbstractController*>::iterator it = - g_abs_controls[i].begin(); - it != g_abs_controls[i].end(); ++it) { + delayHistogram + .init(10) + .name(pName + ".delayHist") + .desc("delay histogram for all message") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); - AbstractController *ctr = (*it).second; - map<string, uint64_t> mp = ctr->getRequestProfileMap(); + uint32_t numVNets = Network::getNumberOfVirtualNetworks(); + for (int i = 0; i < numVNets; i++) { + delayVCHistogram.push_back(new Stats::Histogram()); + delayVCHistogram[i] + ->init(10) + .name(pName + csprintf(".delayVCHist.vnet_%i", i)) + .desc(csprintf("delay histogram for vnet_%i", i)) + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + } - for (map<string, uint64_t>::iterator jt = mp.begin(); - jt != mp.end(); ++jt) { + m_outstandReqHist + .init(10) + .name(pName + ".outstanding_req_hist") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_latencyHist + .init(10) + .name(pName + ".latency_hist") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_hitLatencyHist + .init(10) + .name(pName + ".hit_latency_hist") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missLatencyHist + .init(10) + .name(pName + ".miss_latency_hist") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); - map<string, uint64_t>::iterator kt = - m_requestProfileMap.find((*jt).first); - if (kt != m_requestProfileMap.end()) { - (*kt).second += (*jt).second; - } else { - m_requestProfileMap[(*jt).first] = (*jt).second; - } - } + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_typeLatencyHist.push_back(new Stats::Histogram()); + m_typeLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.latency_hist", + RubyRequestType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_hitTypeLatencyHist.push_back(new Stats::Histogram()); + m_hitTypeLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.hit_latency_hist", + RubyRequestType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missTypeLatencyHist.push_back(new Stats::Histogram()); + m_missTypeLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.miss_latency_hist", + RubyRequestType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + } - m_requests += ctr->getRequestCount(); - } + for (int i = 0; i < MachineType_NUM; i++) { + m_hitMachLatencyHist.push_back(new Stats::Histogram()); + m_hitMachLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.hit_mach_latency_hist", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missMachLatencyHist.push_back(new Stats::Histogram()); + m_missMachLatencyHist[i] + ->init(10) + .name(pName + csprintf(".%s.miss_mach_latency_hist", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHist[i] + ->init(10) + .name(pName + csprintf( + ".%s.miss_latency_hist.issue_to_initial_request", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHist[i] + ->init(10) + .name(pName + csprintf(".%s.miss_latency_hist.initial_to_forward", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHist[i] + ->init(10) + .name(pName + csprintf( + ".%s.miss_latency_hist.forward_to_first_response", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHist[i] + ->init(10) + .name(pName + csprintf( + ".%s.miss_latency_hist.first_response_to_completion", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_IncompleteTimes[i] + .name(pName + csprintf(".%s.incomplete_times", MachineType(i))) + .desc("") + .flags(Stats::nozero); } - map<string, uint64_t>::const_iterator i = m_requestProfileMap.begin(); - map<string, uint64_t>::const_iterator end = m_requestProfileMap.end(); - for (; i != end; ++i) { - const string &key = i->first; - uint64_t count = i->second; - - double percent = (100.0 * double(count)) / double(m_requests); - vector<string> items; - tokenize(items, key, ':'); - vector<string>::iterator j = items.begin(); - vector<string>::iterator end = items.end(); - for (; j != end; ++i) - out << setw(10) << *j; - out << setw(11) << count; - out << setw(14) << percent << endl; + for (int i = 0; i < RubyRequestType_NUM; i++) { + m_hitTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>()); + m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>()); + + for (int j = 0; j < MachineType_NUM; j++) { + m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_hitTypeMachLatencyHist[i][j] + ->init(10) + .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist", + RubyRequestType(i), MachineType(j))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHist[i][j] + ->init(10) + .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist", + RubyRequestType(i), MachineType(j))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + } } - out << endl; } void -Profiler::printDelayProfile(ostream &out) const +Profiler::collateStats() { - out << "Message Delayed Cycles" << endl; - out << "----------------------" << endl; + if (!m_all_instructions) { + m_address_profiler_ptr->collateStats(); + } - uint32_t numVNets = Network::getNumberOfVirtualNetworks(); - Histogram delayHistogram; - std::vector<Histogram> delayVCHistogram(numVNets); + if (m_all_instructions) { + m_inst_profiler_ptr->collateStats(); + } + uint32_t numVNets = Network::getNumberOfVirtualNetworks(); for (uint32_t i = 0; i < MachineType_NUM; i++) { for (map<uint32_t, AbstractController*>::iterator it = g_abs_controls[i].begin(); @@ -171,314 +262,81 @@ Profiler::printDelayProfile(ostream &out) const delayHistogram.add(ctr->getDelayHist()); for (uint32_t i = 0; i < numVNets; i++) { - delayVCHistogram[i].add(ctr->getDelayVCHist(i)); + delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); } } } - out << "Total_delay_cycles: " << delayHistogram << endl; - - for (int i = 0; i < numVNets; i++) { - out << " virtual_network_" << i << "_delay_cycles: " - << delayVCHistogram[i] << endl; - } -} - -void -Profiler::printOutstandingReqProfile(ostream &out) const -{ - Histogram sequencerRequests; - for (uint32_t i = 0; i < MachineType_NUM; i++) { for (map<uint32_t, AbstractController*>::iterator it = - g_abs_controls[i].begin(); - it != g_abs_controls[i].end(); ++it) { + g_abs_controls[i].begin(); + it != g_abs_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; Sequencer *seq = ctr->getSequencer(); if (seq != NULL) { - sequencerRequests.add(seq->getOutstandReqHist()); + m_outstandReqHist.add(seq->getOutstandReqHist()); } } } - out << "sequencer_requests_outstanding: " - << sequencerRequests << endl; -} - -void -Profiler::printMissLatencyProfile(ostream &out) const -{ - // Collate the miss latencies histograms from all the sequencers - Histogram latency_hist; - std::vector<Histogram> type_latency_hist(RubyRequestType_NUM); - - Histogram hit_latency_hist; - std::vector<Histogram> hit_type_latency_hist(RubyRequestType_NUM); - - std::vector<Histogram> hit_mach_latency_hist(MachineType_NUM); - std::vector<std::vector<Histogram> > - hit_type_mach_latency_hist(RubyRequestType_NUM, - std::vector<Histogram>(MachineType_NUM)); - - Histogram miss_latency_hist; - std::vector<Histogram> miss_type_latency_hist(RubyRequestType_NUM); - - std::vector<Histogram> miss_mach_latency_hist(MachineType_NUM); - std::vector<std::vector<Histogram> > - miss_type_mach_latency_hist(RubyRequestType_NUM, - std::vector<Histogram>(MachineType_NUM)); - - std::vector<Histogram> issue_to_initial_delay_hist(MachineType_NUM); - std::vector<Histogram> initial_to_forward_delay_hist(MachineType_NUM); - std::vector<Histogram> - forward_to_first_response_delay_hist(MachineType_NUM); - std::vector<Histogram> - first_response_to_completion_delay_hist(MachineType_NUM); - std::vector<uint64_t> incomplete_times(MachineType_NUM); - for (uint32_t i = 0; i < MachineType_NUM; i++) { for (map<uint32_t, AbstractController*>::iterator it = - g_abs_controls[i].begin(); - it != g_abs_controls[i].end(); ++it) { + g_abs_controls[i].begin(); + it != g_abs_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; Sequencer *seq = ctr->getSequencer(); if (seq != NULL) { // add all the latencies - latency_hist.add(seq->getLatencyHist()); - hit_latency_hist.add(seq->getHitLatencyHist()); - miss_latency_hist.add(seq->getMissLatencyHist()); + m_latencyHist.add(seq->getLatencyHist()); + m_hitLatencyHist.add(seq->getHitLatencyHist()); + m_missLatencyHist.add(seq->getMissLatencyHist()); // add the per request type latencies for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { - type_latency_hist[j] - .add(seq->getTypeLatencyHist(j)); - hit_type_latency_hist[j] - .add(seq->getHitTypeLatencyHist(j)); - miss_type_latency_hist[j] - .add(seq->getMissTypeLatencyHist(j)); + m_typeLatencyHist[j] + ->add(seq->getTypeLatencyHist(j)); + m_hitTypeLatencyHist[j] + ->add(seq->getHitTypeLatencyHist(j)); + m_missTypeLatencyHist[j] + ->add(seq->getMissTypeLatencyHist(j)); } // add the per machine type miss latencies for (uint32_t j = 0; j < MachineType_NUM; ++j) { - hit_mach_latency_hist[j] - .add(seq->getHitMachLatencyHist(j)); - miss_mach_latency_hist[j] - .add(seq->getMissMachLatencyHist(j)); + m_hitMachLatencyHist[j] + ->add(seq->getHitMachLatencyHist(j)); + m_missMachLatencyHist[j] + ->add(seq->getMissMachLatencyHist(j)); - issue_to_initial_delay_hist[j].add( + m_IssueToInitialDelayHist[j]->add( seq->getIssueToInitialDelayHist(MachineType(j))); - initial_to_forward_delay_hist[j].add( + m_InitialToForwardDelayHist[j]->add( seq->getInitialToForwardDelayHist(MachineType(j))); - forward_to_first_response_delay_hist[j].add(seq-> + m_ForwardToFirstResponseDelayHist[j]->add(seq-> getForwardRequestToFirstResponseHist(MachineType(j))); - first_response_to_completion_delay_hist[j].add(seq-> - getFirstResponseToCompletionDelayHist(MachineType(j))); - incomplete_times[j] += + m_FirstResponseToCompletionDelayHist[j]->add(seq-> + getFirstResponseToCompletionDelayHist( + MachineType(j))); + m_IncompleteTimes[j] += seq->getIncompleteTimes(MachineType(j)); } // add the per (request, machine) type miss latencies for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { for (uint32_t k = 0; k < MachineType_NUM; k++) { - hit_type_mach_latency_hist[j][k].add( - seq->getHitTypeMachLatencyHist(j,k)); - miss_type_mach_latency_hist[j][k].add( - seq->getMissTypeMachLatencyHist(j,k)); + m_hitTypeMachLatencyHist[j][k]->add( + seq->getHitTypeMachLatencyHist(j,k)); + m_missTypeMachLatencyHist[j][k]->add( + seq->getMissTypeMachLatencyHist(j,k)); } } } } } - - out << "latency: " << latency_hist << endl; - for (int i = 0; i < RubyRequestType_NUM; i++) { - if (type_latency_hist[i].size() > 0) { - out << "latency: " << RubyRequestType(i) << ": " - << type_latency_hist[i] << endl; - } - } - - out << "hit latency: " << hit_latency_hist << endl; - for (int i = 0; i < RubyRequestType_NUM; i++) { - if (hit_type_latency_hist[i].size() > 0) { - out << "hit latency: " << RubyRequestType(i) << ": " - << hit_type_latency_hist[i] << endl; - } - } - - for (int i = 0; i < MachineType_NUM; i++) { - if (hit_mach_latency_hist[i].size() > 0) { - out << "hit latency: " << MachineType(i) << ": " - << hit_mach_latency_hist[i] << endl; - } - } - - for (int i = 0; i < RubyRequestType_NUM; i++) { - for (int j = 0; j < MachineType_NUM; j++) { - if (hit_type_mach_latency_hist[i][j].size() > 0) { - out << "hit latency: " << RubyRequestType(i) - << ": " << MachineType(j) << ": " - << hit_type_mach_latency_hist[i][j] << endl; - } - } - } - - out << "miss latency: " << miss_latency_hist << endl; - for (int i = 0; i < RubyRequestType_NUM; i++) { - if (miss_type_latency_hist[i].size() > 0) { - out << "miss latency: " << RubyRequestType(i) << ": " - << miss_type_latency_hist[i] << endl; - } - } - - for (int i = 0; i < MachineType_NUM; i++) { - if (miss_mach_latency_hist[i].size() > 0) { - out << "miss latency: " << MachineType(i) << ": " - << miss_mach_latency_hist[i] << endl; - - out << "miss latency: " << MachineType(i) - << "::issue_to_initial_request: " - << issue_to_initial_delay_hist[i] << endl; - out << "miss latency: " << MachineType(i) - << "::initial_to_forward_request: " - << initial_to_forward_delay_hist[i] << endl; - out << "miss latency: " << MachineType(i) - << "::forward_to_first_response: " - << forward_to_first_response_delay_hist[i] << endl; - out << "miss latency: " << MachineType(i) - << "::first_response_to_completion: " - << first_response_to_completion_delay_hist[i] << endl; - out << "incomplete times: " << incomplete_times[i] << endl; - } - } - - for (int i = 0; i < RubyRequestType_NUM; i++) { - for (int j = 0; j < MachineType_NUM; j++) { - if (miss_type_mach_latency_hist[i][j].size() > 0) { - out << "miss latency: " << RubyRequestType(i) - << ": " << MachineType(j) << ": " - << miss_type_mach_latency_hist[i][j] << endl; - } - } - } - - out << endl; -} - -void -Profiler::printStats(ostream& out, bool short_stats) -{ - out << endl; - if (short_stats) { - out << "SHORT "; - } - out << "Profiler Stats" << endl; - out << "--------------" << endl; - - Cycles ruby_cycles = g_system_ptr->curCycle()-m_ruby_start; - - out << "Ruby_current_time: " << g_system_ptr->curCycle() << endl; - out << "Ruby_start_time: " << m_ruby_start << endl; - out << "Ruby_cycles: " << ruby_cycles << endl; - out << endl; - - if (!short_stats) { - out << "Busy Controller Counts:" << endl; - for (uint32_t i = 0; i < MachineType_NUM; i++) { - uint32_t size = MachineType_base_count((MachineType)i); - - for (uint32_t j = 0; j < size; j++) { - MachineID machID; - machID.type = (MachineType)i; - machID.num = j; - - AbstractController *ctr = - (*(g_abs_controls[i].find(j))).second; - out << machID << ":" << ctr->getFullyBusyCycles() << " "; - if ((j + 1) % 8 == 0) { - out << endl; - } - } - out << endl; - } - out << endl; - - out << "Busy Bank Count:" << m_busyBankCount << endl; - out << endl; - - printOutstandingReqProfile(out); - out << endl; - } - - if (!short_stats) { - out << "All Non-Zero Cycle Demand Cache Accesses" << endl; - out << "----------------------------------------" << endl; - printMissLatencyProfile(out); - - if (m_all_sharing_histogram.size() > 0) { - out << "all_sharing: " << m_all_sharing_histogram << endl; - out << "read_sharing: " << m_read_sharing_histogram << endl; - out << "write_sharing: " << m_write_sharing_histogram << endl; - - out << "all_sharing_percent: "; - m_all_sharing_histogram.printPercent(out); - out << endl; - - out << "read_sharing_percent: "; - m_read_sharing_histogram.printPercent(out); - out << endl; - - out << "write_sharing_percent: "; - m_write_sharing_histogram.printPercent(out); - out << endl; - - int64 total_miss = m_cache_to_cache + m_memory_to_cache; - out << "all_misses: " << total_miss << endl; - out << "cache_to_cache_misses: " << m_cache_to_cache << endl; - out << "memory_to_cache_misses: " << m_memory_to_cache << endl; - out << "cache_to_cache_percent: " - << 100.0 * (double(m_cache_to_cache) / double(total_miss)) - << endl; - out << "memory_to_cache_percent: " - << 100.0 * (double(m_memory_to_cache) / double(total_miss)) - << endl; - out << endl; - } - - printRequestProfile(out); - - if (!m_all_instructions) { - m_address_profiler_ptr->printStats(out); - } - - if (m_all_instructions) { - m_inst_profiler_ptr->printStats(out); - } - - out << endl; - printDelayProfile(out); - } -} - -void -Profiler::clearStats() -{ - m_ruby_start = g_system_ptr->curCycle(); - m_real_time_start_time = time(NULL); - - m_busyBankCount = 0; - m_read_sharing_histogram.clear(); - m_write_sharing_histogram.clear(); - m_all_sharing_histogram.clear(); - m_cache_to_cache = 0; - m_memory_to_cache = 0; - - // update the start time - m_ruby_start = g_system_ptr->curCycle(); } void @@ -496,60 +354,3 @@ Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) msg.getType(), msg.getAccessMode(), id, false); } } - -void -Profiler::profileSharing(const Address& addr, AccessType type, - NodeID requestor, const Set& sharers, - const Set& owner) -{ - Set set_contacted(owner); - if (type == AccessType_Write) { - set_contacted.addSet(sharers); - } - set_contacted.remove(requestor); - int number_contacted = set_contacted.count(); - - if (type == AccessType_Write) { - m_write_sharing_histogram.add(number_contacted); - } else { - m_read_sharing_histogram.add(number_contacted); - } - m_all_sharing_histogram.add(number_contacted); - - if (number_contacted == 0) { - m_memory_to_cache++; - } else { - m_cache_to_cache++; - } -} - -void -Profiler::bankBusy() -{ - m_busyBankCount++; -} - -void -Profiler::rubyWatch(int id) -{ - uint64 tr = 0; - Address watch_address = Address(tr); - - DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr->curCycle(), id, - watch_address); - - // don't care about success or failure - m_watch_address_set.insert(watch_address); -} - -bool -Profiler::watchAddress(Address addr) -{ - return m_watch_address_set.count(addr) > 0; -} - -Profiler * -RubyProfilerParams::create() -{ - return new Profiler(this); -} |