From 698866d46197ad062384894f1927f99f26b03f3b Mon Sep 17 00:00:00 2001 From: David Hashe Date: Mon, 20 Jul 2015 09:15:18 -0500 Subject: ruby: split CPU and GPU latency stats --- src/mem/ruby/profiler/Profiler.cc | 257 ++++++++++++++++----- src/mem/ruby/profiler/Profiler.hh | 43 ++-- src/mem/ruby/slicc_interface/AbstractController.cc | 1 + src/mem/ruby/slicc_interface/AbstractController.hh | 2 + 4 files changed, 225 insertions(+), 78 deletions(-) (limited to 'src/mem/ruby') diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index 7d3f20982..04e8331c4 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -42,6 +42,8 @@ ---------------------------------------------------------------------- */ +#include "mem/ruby/profiler/Profiler.hh" + #include #include @@ -54,7 +56,7 @@ #include "mem/protocol/RubyRequest.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/profiler/AddressProfiler.hh" -#include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/GPUCoalescer.hh" #include "mem/ruby/system/Sequencer.hh" using namespace std; @@ -106,131 +108,217 @@ Profiler::regStats(const std::string &pName) .flags(Stats::nozero | Stats::pdf | Stats::oneline); } - m_outstandReqHist + m_outstandReqHistSeqr + .init(10) + .name(pName + ".outstanding_req_hist_seqr") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_outstandReqHistCoalsr + .init(10) + .name(pName + ".outstanding_req_hist_coalsr") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_latencyHistSeqr .init(10) - .name(pName + ".outstanding_req_hist") + .name(pName + ".latency_hist_seqr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_latencyHist + m_latencyHistCoalsr .init(10) - .name(pName + ".latency_hist") + .name(pName + ".latency_hist_coalsr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_hitLatencyHist + m_hitLatencyHistSeqr .init(10) - .name(pName + ".hit_latency_hist") + .name(pName + ".hit_latency_hist_seqr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missLatencyHist + m_missLatencyHistSeqr .init(10) - .name(pName + ".miss_latency_hist") + .name(pName + ".miss_latency_hist_seqr") + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missLatencyHistCoalsr + .init(10) + .name(pName + ".miss_latency_hist_coalsr") .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); for (int i = 0; i < RubyRequestType_NUM; i++) { - m_typeLatencyHist.push_back(new Stats::Histogram()); - m_typeLatencyHist[i] + m_typeLatencyHistSeqr.push_back(new Stats::Histogram()); + m_typeLatencyHistSeqr[i] + ->init(10) + .name(pName + csprintf(".%s.latency_hist_seqr", + RubyRequestType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_typeLatencyHistCoalsr.push_back(new Stats::Histogram()); + m_typeLatencyHistCoalsr[i] ->init(10) - .name(pName + csprintf(".%s.latency_hist", + .name(pName + csprintf(".%s.latency_hist_coalsr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_hitTypeLatencyHist.push_back(new Stats::Histogram()); - m_hitTypeLatencyHist[i] + m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram()); + m_hitTypeLatencyHistSeqr[i] ->init(10) - .name(pName + csprintf(".%s.hit_latency_hist", + .name(pName + csprintf(".%s.hit_latency_hist_seqr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missTypeLatencyHist.push_back(new Stats::Histogram()); - m_missTypeLatencyHist[i] + m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram()); + m_missTypeLatencyHistSeqr[i] ->init(10) - .name(pName + csprintf(".%s.miss_latency_hist", + .name(pName + csprintf(".%s.miss_latency_hist_seqr", + RubyRequestType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram()); + m_missTypeLatencyHistCoalsr[i] + ->init(10) + .name(pName + csprintf(".%s.miss_latency_hist_coalsr", RubyRequestType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); } for (int i = 0; i < MachineType_NUM; i++) { - m_hitMachLatencyHist.push_back(new Stats::Histogram()); - m_hitMachLatencyHist[i] + m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram()); + m_hitMachLatencyHistSeqr[i] + ->init(10) + .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missMachLatencyHistSeqr.push_back(new Stats::Histogram()); + m_missMachLatencyHistSeqr[i] ->init(10) - .name(pName + csprintf(".%s.hit_mach_latency_hist", + .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missMachLatencyHist.push_back(new Stats::Histogram()); - m_missMachLatencyHist[i] + m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram()); + m_missMachLatencyHistCoalsr[i] ->init(10) - .name(pName + csprintf(".%s.miss_mach_latency_hist", + .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); - m_IssueToInitialDelayHist[i] + m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHistSeqr[i] + ->init(10) + .name(pName + csprintf( + ".%s.miss_latency_hist_seqr.issue_to_initial_request", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram()); + m_IssueToInitialDelayHistCoalsr[i] ->init(10) .name(pName + csprintf( - ".%s.miss_latency_hist.issue_to_initial_request", + ".%s.miss_latency_hist_coalsr.issue_to_initial_request", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); - m_InitialToForwardDelayHist[i] + m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHistSeqr[i] + ->init(10) + .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram()); + m_InitialToForwardDelayHistCoalsr[i] ->init(10) - .name(pName + csprintf(".%s.miss_latency_hist.initial_to_forward", + .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); - m_ForwardToFirstResponseDelayHist[i] + m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHistSeqr[i] ->init(10) .name(pName + csprintf( - ".%s.miss_latency_hist.forward_to_first_response", + ".%s.miss_latency_hist_seqr.forward_to_first_response", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); - m_FirstResponseToCompletionDelayHist[i] + m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram()); + m_ForwardToFirstResponseDelayHistCoalsr[i] ->init(10) .name(pName + csprintf( - ".%s.miss_latency_hist.first_response_to_completion", + ".%s.miss_latency_hist_coalsr.forward_to_first_response", MachineType(i))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_IncompleteTimes[i] - .name(pName + csprintf(".%s.incomplete_times", MachineType(i))) + m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHistSeqr[i] + ->init(10) + .name(pName + csprintf( + ".%s.miss_latency_hist_seqr.first_response_to_completion", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram()); + m_FirstResponseToCompletionDelayHistCoalsr[i] + ->init(10) + .name(pName + csprintf( + ".%s.miss_latency_hist_coalsr.first_response_to_completion", + MachineType(i))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_IncompleteTimesSeqr[i] + .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i))) .desc("") .flags(Stats::nozero); } for (int i = 0; i < RubyRequestType_NUM; i++) { - m_hitTypeMachLatencyHist.push_back(std::vector()); - m_missTypeMachLatencyHist.push_back(std::vector()); + m_hitTypeMachLatencyHistSeqr.push_back(std::vector()); + m_missTypeMachLatencyHistSeqr.push_back(std::vector()); + m_missTypeMachLatencyHistCoalsr.push_back(std::vector()); for (int j = 0; j < MachineType_NUM; j++) { - m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram()); - m_hitTypeMachLatencyHist[i][j] + m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); + m_hitTypeMachLatencyHistSeqr[i][j] + ->init(10) + .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr", + RubyRequestType(i), MachineType(j))) + .desc("") + .flags(Stats::nozero | Stats::pdf | Stats::oneline); + + m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHistSeqr[i][j] ->init(10) - .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist", + .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr", RubyRequestType(i), MachineType(j))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); - m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); - m_missTypeMachLatencyHist[i][j] + m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram()); + m_missTypeMachLatencyHistCoalsr[i][j] ->init(10) - .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist", + .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr", RubyRequestType(i), MachineType(j))) .desc("") .flags(Stats::nozero | Stats::pdf | Stats::oneline); @@ -271,7 +359,11 @@ Profiler::collateStats() AbstractController *ctr = (*it).second; Sequencer *seq = ctr->getCPUSequencer(); if (seq != NULL) { - m_outstandReqHist.add(seq->getOutstandReqHist()); + m_outstandReqHistSeqr.add(seq->getOutstandReqHist()); + } + GPUCoalescer *coal = ctr->getGPUCoalescer(); + if (coal != NULL) { + m_outstandReqHistCoalsr.add(coal->getOutstandReqHist()); } } } @@ -285,52 +377,93 @@ Profiler::collateStats() Sequencer *seq = ctr->getCPUSequencer(); if (seq != NULL) { // add all the latencies - m_latencyHist.add(seq->getLatencyHist()); - m_hitLatencyHist.add(seq->getHitLatencyHist()); - m_missLatencyHist.add(seq->getMissLatencyHist()); + m_latencyHistSeqr.add(seq->getLatencyHist()); + m_hitLatencyHistSeqr.add(seq->getHitLatencyHist()); + m_missLatencyHistSeqr.add(seq->getMissLatencyHist()); // add the per request type latencies for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { - m_typeLatencyHist[j] + m_typeLatencyHistSeqr[j] ->add(seq->getTypeLatencyHist(j)); - m_hitTypeLatencyHist[j] + m_hitTypeLatencyHistSeqr[j] ->add(seq->getHitTypeLatencyHist(j)); - m_missTypeLatencyHist[j] + m_missTypeLatencyHistSeqr[j] ->add(seq->getMissTypeLatencyHist(j)); } // add the per machine type miss latencies for (uint32_t j = 0; j < MachineType_NUM; ++j) { - m_hitMachLatencyHist[j] + m_hitMachLatencyHistSeqr[j] ->add(seq->getHitMachLatencyHist(j)); - m_missMachLatencyHist[j] + m_missMachLatencyHistSeqr[j] ->add(seq->getMissMachLatencyHist(j)); - m_IssueToInitialDelayHist[j]->add( + m_IssueToInitialDelayHistSeqr[j]->add( seq->getIssueToInitialDelayHist(MachineType(j))); - m_InitialToForwardDelayHist[j]->add( + m_InitialToForwardDelayHistSeqr[j]->add( seq->getInitialToForwardDelayHist(MachineType(j))); - m_ForwardToFirstResponseDelayHist[j]->add(seq-> + m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq-> getForwardRequestToFirstResponseHist(MachineType(j))); - m_FirstResponseToCompletionDelayHist[j]->add(seq-> + m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq-> getFirstResponseToCompletionDelayHist( MachineType(j))); - m_IncompleteTimes[j] += + m_IncompleteTimesSeqr[j] += seq->getIncompleteTimes(MachineType(j)); } // add the per (request, machine) type miss latencies for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { for (uint32_t k = 0; k < MachineType_NUM; k++) { - m_hitTypeMachLatencyHist[j][k]->add( + m_hitTypeMachLatencyHistSeqr[j][k]->add( seq->getHitTypeMachLatencyHist(j,k)); - m_missTypeMachLatencyHist[j][k]->add( + m_missTypeMachLatencyHistSeqr[j][k]->add( seq->getMissTypeMachLatencyHist(j,k)); } } } + + GPUCoalescer *coal = ctr->getGPUCoalescer(); + if (coal != NULL) { + // add all the latencies + m_latencyHistCoalsr.add(coal->getLatencyHist()); + m_missLatencyHistCoalsr.add(coal->getMissLatencyHist()); + + // add the per request type latencies + for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { + m_typeLatencyHistCoalsr[j] + ->add(coal->getTypeLatencyHist(j)); + m_missTypeLatencyHistCoalsr[j] + ->add(coal->getMissTypeLatencyHist(j)); + } + + // add the per machine type miss latencies + for (uint32_t j = 0; j < MachineType_NUM; ++j) { + m_missMachLatencyHistCoalsr[j] + ->add(coal->getMissMachLatencyHist(j)); + + m_IssueToInitialDelayHistCoalsr[j]->add( + coal->getIssueToInitialDelayHist(MachineType(j))); + + m_InitialToForwardDelayHistCoalsr[j]->add( + coal->getInitialToForwardDelayHist(MachineType(j))); + m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal-> + getForwardRequestToFirstResponseHist(MachineType(j))); + + m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal-> + getFirstResponseToCompletionDelayHist( + MachineType(j))); + } + + // add the per (request, machine) type miss latencies + for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { + for (uint32_t k = 0; k < MachineType_NUM; k++) { + m_missTypeMachLatencyHistCoalsr[j][k]->add( + coal->getMissTypeMachLatencyHist(j,k)); + } + } + } } } } diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 5be75fb65..6ad65f962 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -94,38 +94,49 @@ class Profiler std::vector delayVCHistogram; //! Histogram for number of outstanding requests per cycle. - Stats::Histogram m_outstandReqHist; + Stats::Histogram m_outstandReqHistSeqr; + Stats::Histogram m_outstandReqHistCoalsr; //! Histogram for holding latency profile of all requests. - Stats::Histogram m_latencyHist; - std::vector m_typeLatencyHist; + Stats::Histogram m_latencyHistSeqr; + Stats::Histogram m_latencyHistCoalsr; + std::vector m_typeLatencyHistSeqr; + std::vector m_typeLatencyHistCoalsr; //! Histogram for holding latency profile of all requests that //! hit in the controller connected to this sequencer. - Stats::Histogram m_hitLatencyHist; - std::vector m_hitTypeLatencyHist; + Stats::Histogram m_hitLatencyHistSeqr; + std::vector m_hitTypeLatencyHistSeqr; //! Histograms for profiling the latencies for requests that //! did not required external messages. - std::vector m_hitMachLatencyHist; - std::vector< std::vector > m_hitTypeMachLatencyHist; + std::vector m_hitMachLatencyHistSeqr; + std::vector< std::vector > m_hitTypeMachLatencyHistSeqr; //! Histogram for holding latency profile of all requests that //! miss in the controller connected to this sequencer. - Stats::Histogram m_missLatencyHist; - std::vector m_missTypeLatencyHist; + Stats::Histogram m_missLatencyHistSeqr; + Stats::Histogram m_missLatencyHistCoalsr; + std::vector m_missTypeLatencyHistSeqr; + std::vector m_missTypeLatencyHistCoalsr; //! Histograms for profiling the latencies for requests that //! required external messages. - std::vector m_missMachLatencyHist; - std::vector< std::vector > m_missTypeMachLatencyHist; + std::vector m_missMachLatencyHistSeqr; + std::vector< std::vector > m_missTypeMachLatencyHistSeqr; + std::vector m_missMachLatencyHistCoalsr; + std::vector< std::vector > m_missTypeMachLatencyHistCoalsr; //! Histograms for recording the breakdown of miss latency - std::vector m_IssueToInitialDelayHist; - std::vector m_InitialToForwardDelayHist; - std::vector m_ForwardToFirstResponseDelayHist; - std::vector m_FirstResponseToCompletionDelayHist; - Stats::Scalar m_IncompleteTimes[MachineType_NUM]; + std::vector m_IssueToInitialDelayHistSeqr; + std::vector m_InitialToForwardDelayHistSeqr; + std::vector m_ForwardToFirstResponseDelayHistSeqr; + std::vector m_FirstResponseToCompletionDelayHistSeqr; + Stats::Scalar m_IncompleteTimesSeqr[MachineType_NUM]; + std::vector m_IssueToInitialDelayHistCoalsr; + std::vector m_InitialToForwardDelayHistCoalsr; + std::vector m_ForwardToFirstResponseDelayHistCoalsr; + std::vector m_FirstResponseToCompletionDelayHistCoalsr; //added by SS const bool m_hot_lines; diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index 458fde5bc..669fb30fb 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -32,6 +32,7 @@ #include "mem/protocol/MemoryMsg.hh" #include "mem/ruby/system/RubySystem.hh" #include "mem/ruby/system/Sequencer.hh" +#include "mem/ruby/system/GPUCoalescer.hh" #include "sim/system.hh" AbstractController::AbstractController(const Params *p) diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index 4488ee3f4..cfd11b8eb 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -49,6 +49,7 @@ #include "mem/mem_object.hh" class Network; +class GPUCoalescer; // used to communicate that an in_port peeked the wrong message type class RejectException: public std::exception @@ -86,6 +87,7 @@ class AbstractController : public MemObject, public Consumer virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0; virtual Sequencer* getCPUSequencer() const = 0; + virtual GPUCoalescer* getGPUCoalescer() const = 0; //! These functions are used by ruby system to read/write the data blocks //! that exist with in the controller. -- cgit v1.2.3