/* * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* This file has been modified by Kevin Moore and Dan Nussbaum of the Scalable Systems Research Group at Sun Microsystems Laboratories (http://research.sun.com/scalable/) to support the Adaptive Transactional Memory Test Platform (ATMTP). Please send email to atmtp-interest@sun.com with feedback, questions, or to request future announcements about ATMTP. ---------------------------------------------------------------------- File modification date: 2008-02-23 ---------------------------------------------------------------------- */ #include "mem/ruby/profiler/Profiler.hh" #include #include #include #include #include "base/stl_helpers.hh" #include "base/str.hh" #include "mem/protocol/MachineType.hh" #include "mem/protocol/RubyRequest.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/profiler/AddressProfiler.hh" /** * the profiler uses GPUCoalescer code even * though the GPUCoalescer is not built for * all ISAs, which can lead to run/link time * errors. here we guard the coalescer code * with ifdefs as there is no easy way to * refactor this code without removing * GPUCoalescer stats from the profiler. * * eventually we should use probe points * here, but until then these ifdefs will * serve. */ #ifdef BUILD_GPU #include "mem/ruby/system/GPUCoalescer.hh" #endif #include "mem/ruby/system/Sequencer.hh" using namespace std; using m5::stl_helpers::operator<<; Profiler::Profiler(const RubySystemParams *p, RubySystem *rs) : m_ruby_system(rs), m_hot_lines(p->hot_lines), m_all_instructions(p->all_instructions), m_num_vnets(p->number_of_virtual_networks) { m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); m_address_profiler_ptr->setHotLines(m_hot_lines); m_address_profiler_ptr->setAllInstructions(m_all_instructions); if (m_all_instructions) { m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); m_inst_profiler_ptr->setHotLines(m_hot_lines); m_inst_profiler_ptr->setAllInstructions(m_all_instructions); } } Profiler::~Profiler() { } void Profiler::regStats(const std::string &pName) { if (!m_all_instructions) { m_address_profiler_ptr->regStats(pName); } if (m_all_instructions) { m_inst_profiler_ptr->regStats(pName); } delayHistogram .init(10) .name(pName + ".delayHist") .desc("delay histogram for all message") .flags(Stats::nozero | Stats::pdf | Stats::oneline); for (int i = 0; i < m_num_vnets; i++) { delayVCHistogram.push_back(new Stats::Histogram()); delayVCHistogram[i] ->init(10) .name(pName + csprintf(".delayVCHist.vnet_%i", i)) .desc(csprintf("delay histogram for vnet_%i", i)) .flags(Stats::nozero | Stats::pdf | Stats::oneline); } m_outstandReqHistSeqr .init(10) .name(pName + ".outstanding_req_hist_seqr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_outstandReqHistCoalsr .init(10) .name(pName + ".outstanding_req_hist_coalsr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_latencyHistSeqr .init(10) .name(pName + ".latency_hist_seqr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_latencyHistCoalsr .init(10) .name(pName + ".latency_hist_coalsr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_hitLatencyHistSeqr .init(10) .name(pName + ".hit_latency_hist_seqr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missLatencyHistSeqr .init(10) .name(pName + ".miss_latency_hist_seqr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missLatencyHistCoalsr .init(10) .name(pName + ".miss_latency_hist_coalsr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); for (int i = 0; i < RubyRequestType_NUM; i++) { m_typeLatencyHistSeqr.push_back(new Stats::Histogram()); m_typeLatencyHistSeqr[i] ->init(10) .name(pName + csprintf(".%s.latency_hist_seqr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_typeLatencyHistCoalsr.push_back(new Stats::Histogram()); m_typeLatencyHistCoalsr[i] ->init(10) .name(pName + csprintf(".%s.latency_hist_coalsr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram()); m_hitTypeLatencyHistSeqr[i] ->init(10) .name(pName + csprintf(".%s.hit_latency_hist_seqr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram()); m_missTypeLatencyHistSeqr[i] ->init(10) .name(pName + csprintf(".%s.miss_latency_hist_seqr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram()); m_missTypeLatencyHistCoalsr[i] ->init(10) .name(pName + csprintf(".%s.miss_latency_hist_coalsr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); } for (int i = 0; i < MachineType_NUM; i++) { m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram()); m_hitMachLatencyHistSeqr[i] ->init(10) .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missMachLatencyHistSeqr.push_back(new Stats::Histogram()); m_missMachLatencyHistSeqr[i] ->init(10) .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram()); m_missMachLatencyHistCoalsr[i] ->init(10) .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram()); m_IssueToInitialDelayHistSeqr[i] ->init(10) .name(pName + csprintf( ".%s.miss_latency_hist_seqr.issue_to_initial_request", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram()); m_IssueToInitialDelayHistCoalsr[i] ->init(10) .name(pName + csprintf( ".%s.miss_latency_hist_coalsr.issue_to_initial_request", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram()); m_InitialToForwardDelayHistSeqr[i] ->init(10) .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram()); m_InitialToForwardDelayHistCoalsr[i] ->init(10) .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram()); m_ForwardToFirstResponseDelayHistSeqr[i] ->init(10) .name(pName + csprintf( ".%s.miss_latency_hist_seqr.forward_to_first_response", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram()); m_ForwardToFirstResponseDelayHistCoalsr[i] ->init(10) .name(pName + csprintf( ".%s.miss_latency_hist_coalsr.forward_to_first_response", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram()); m_FirstResponseToCompletionDelayHistSeqr[i] ->init(10) .name(pName + csprintf( ".%s.miss_latency_hist_seqr.first_response_to_completion", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram()); m_FirstResponseToCompletionDelayHistCoalsr[i] ->init(10) .name(pName + csprintf( ".%s.miss_latency_hist_coalsr.first_response_to_completion", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_IncompleteTimesSeqr[i] .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i))) .desc("") .flags(Stats::nozero); } for (int i = 0; i < RubyRequestType_NUM; i++) { m_hitTypeMachLatencyHistSeqr.push_back(std::vector()); m_missTypeMachLatencyHistSeqr.push_back(std::vector()); m_missTypeMachLatencyHistCoalsr.push_back(std::vector()); for (int j = 0; j < MachineType_NUM; j++) { m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); m_hitTypeMachLatencyHistSeqr[i][j] ->init(10) .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr", RubyRequestType(i), MachineType(j))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); m_missTypeMachLatencyHistSeqr[i][j] ->init(10) .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr", RubyRequestType(i), MachineType(j))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram()); m_missTypeMachLatencyHistCoalsr[i][j] ->init(10) .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr", RubyRequestType(i), MachineType(j))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); } } } void Profiler::collateStats() { if (!m_all_instructions) { m_address_profiler_ptr->collateStats(); } if (m_all_instructions) { m_inst_profiler_ptr->collateStats(); } for (uint32_t i = 0; i < MachineType_NUM; i++) { for (map::iterator it = m_ruby_system->m_abstract_controls[i].begin(); it != m_ruby_system->m_abstract_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; delayHistogram.add(ctr->getDelayHist()); for (uint32_t i = 0; i < m_num_vnets; i++) { delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); } } } for (uint32_t i = 0; i < MachineType_NUM; i++) { for (map::iterator it = m_ruby_system->m_abstract_controls[i].begin(); it != m_ruby_system->m_abstract_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; Sequencer *seq = ctr->getCPUSequencer(); if (seq != NULL) { m_outstandReqHistSeqr.add(seq->getOutstandReqHist()); } #ifdef BUILD_GPU GPUCoalescer *coal = ctr->getGPUCoalescer(); if (coal != NULL) { m_outstandReqHistCoalsr.add(coal->getOutstandReqHist()); } #endif } } for (uint32_t i = 0; i < MachineType_NUM; i++) { for (map::iterator it = m_ruby_system->m_abstract_controls[i].begin(); it != m_ruby_system->m_abstract_controls[i].end(); ++it) { AbstractController *ctr = (*it).second; Sequencer *seq = ctr->getCPUSequencer(); if (seq != NULL) { // add all the latencies m_latencyHistSeqr.add(seq->getLatencyHist()); m_hitLatencyHistSeqr.add(seq->getHitLatencyHist()); m_missLatencyHistSeqr.add(seq->getMissLatencyHist()); // add the per request type latencies for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { m_typeLatencyHistSeqr[j] ->add(seq->getTypeLatencyHist(j)); m_hitTypeLatencyHistSeqr[j] ->add(seq->getHitTypeLatencyHist(j)); m_missTypeLatencyHistSeqr[j] ->add(seq->getMissTypeLatencyHist(j)); } // add the per machine type miss latencies for (uint32_t j = 0; j < MachineType_NUM; ++j) { m_hitMachLatencyHistSeqr[j] ->add(seq->getHitMachLatencyHist(j)); m_missMachLatencyHistSeqr[j] ->add(seq->getMissMachLatencyHist(j)); m_IssueToInitialDelayHistSeqr[j]->add( seq->getIssueToInitialDelayHist(MachineType(j))); m_InitialToForwardDelayHistSeqr[j]->add( seq->getInitialToForwardDelayHist(MachineType(j))); m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq-> getForwardRequestToFirstResponseHist(MachineType(j))); m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq-> getFirstResponseToCompletionDelayHist( MachineType(j))); m_IncompleteTimesSeqr[j] += seq->getIncompleteTimes(MachineType(j)); } // add the per (request, machine) type miss latencies for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { for (uint32_t k = 0; k < MachineType_NUM; k++) { m_hitTypeMachLatencyHistSeqr[j][k]->add( seq->getHitTypeMachLatencyHist(j,k)); m_missTypeMachLatencyHistSeqr[j][k]->add( seq->getMissTypeMachLatencyHist(j,k)); } } } #ifdef BUILD_GPU GPUCoalescer *coal = ctr->getGPUCoalescer(); if (coal != NULL) { // add all the latencies m_latencyHistCoalsr.add(coal->getLatencyHist()); m_missLatencyHistCoalsr.add(coal->getMissLatencyHist()); // add the per request type latencies for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { m_typeLatencyHistCoalsr[j] ->add(coal->getTypeLatencyHist(j)); m_missTypeLatencyHistCoalsr[j] ->add(coal->getMissTypeLatencyHist(j)); } // add the per machine type miss latencies for (uint32_t j = 0; j < MachineType_NUM; ++j) { m_missMachLatencyHistCoalsr[j] ->add(coal->getMissMachLatencyHist(j)); m_IssueToInitialDelayHistCoalsr[j]->add( coal->getIssueToInitialDelayHist(MachineType(j))); m_InitialToForwardDelayHistCoalsr[j]->add( coal->getInitialToForwardDelayHist(MachineType(j))); m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal-> getForwardRequestToFirstResponseHist(MachineType(j))); m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal-> getFirstResponseToCompletionDelayHist( MachineType(j))); } // add the per (request, machine) type miss latencies for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { for (uint32_t k = 0; k < MachineType_NUM; k++) { m_missTypeMachLatencyHistCoalsr[j][k]->add( coal->getMissTypeMachLatencyHist(j,k)); } } } #endif } } } void Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) { if (msg.getType() != RubyRequestType_IFETCH) { // Note: The following line should be commented out if you // want to use the special profiling that is part of the GS320 // protocol // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be // profiled by the AddressProfiler m_address_profiler_ptr-> addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false); } }