summaryrefslogtreecommitdiff
path: root/src/mem/ruby/profiler
diff options
context:
space:
mode:
authorNilay Vaish <nilay@cs.wisc.edu>2013-06-25 00:32:03 -0500
committerNilay Vaish <nilay@cs.wisc.edu>2013-06-25 00:32:03 -0500
commitbeb6e57c6f6141ad959bb97b49daad7f1fa54af3 (patch)
tree3221c7605fee55dc272fc4ba86ad384db9ad41ca /src/mem/ruby/profiler
parentbeee57070a1fecfe4b854af0c525b454a472202f (diff)
downloadgem5-beb6e57c6f6141ad959bb97b49daad7f1fa54af3.tar.xz
ruby: profiler: lots of inter-related changes
The patch started of with removing the global variables from the profiler for profiling the miss latency of requests made to the cache. The corrresponding histograms have been moved to the Sequencer. These are combined together when the histograms are printed. Separate histograms are now maintained for tracking latency of all requests together, of hits only and of misses only. A particular set of histograms used to use the type GenericMachineType defined in one of the protocol files. This patch removes this type. Now, everything that relied on this type would use MachineType instead. To do this, SLICC has been changed so that multiple machine types can be declared by a controller in its preamble.
Diffstat (limited to 'src/mem/ruby/profiler')
-rw-r--r--src/mem/ruby/profiler/Profiler.cc343
-rw-r--r--src/mem/ruby/profiler/Profiler.hh37
2 files changed, 165 insertions, 215 deletions
diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc
index 76c4dba74..9a963684f 100644
--- a/src/mem/ruby/profiler/Profiler.cc
+++ b/src/mem/ruby/profiler/Profiler.cc
@@ -213,6 +213,169 @@ Profiler::printOutstandingReqProfile(ostream &out) const
}
void
+Profiler::printMissLatencyProfile(ostream &out) const
+{
+ // Collate the miss latencies histograms from all the sequencers
+ Histogram latency_hist;
+ std::vector<Histogram> type_latency_hist(RubyRequestType_NUM);
+
+ Histogram hit_latency_hist;
+ std::vector<Histogram> hit_type_latency_hist(RubyRequestType_NUM);
+
+ std::vector<Histogram> hit_mach_latency_hist(MachineType_NUM);
+ std::vector<std::vector<Histogram> >
+ hit_type_mach_latency_hist(RubyRequestType_NUM,
+ std::vector<Histogram>(MachineType_NUM));
+
+ Histogram miss_latency_hist;
+ std::vector<Histogram> miss_type_latency_hist(RubyRequestType_NUM);
+
+ std::vector<Histogram> miss_mach_latency_hist(MachineType_NUM);
+ std::vector<std::vector<Histogram> >
+ miss_type_mach_latency_hist(RubyRequestType_NUM,
+ std::vector<Histogram>(MachineType_NUM));
+
+ std::vector<Histogram> issue_to_initial_delay_hist(MachineType_NUM);
+ std::vector<Histogram> initial_to_forward_delay_hist(MachineType_NUM);
+ std::vector<Histogram>
+ forward_to_first_response_delay_hist(MachineType_NUM);
+ std::vector<Histogram>
+ first_response_to_completion_delay_hist(MachineType_NUM);
+ std::vector<uint64_t> incomplete_times(MachineType_NUM);
+
+ for (uint32_t i = 0; i < MachineType_NUM; i++) {
+ for (map<uint32_t, AbstractController*>::iterator it =
+ g_abs_controls[i].begin();
+ it != g_abs_controls[i].end(); ++it) {
+
+ AbstractController *ctr = (*it).second;
+ Sequencer *seq = ctr->getSequencer();
+ if (seq != NULL) {
+ // add all the latencies
+ latency_hist.add(seq->getLatencyHist());
+ hit_latency_hist.add(seq->getHitLatencyHist());
+ miss_latency_hist.add(seq->getMissLatencyHist());
+
+ // add the per request type latencies
+ for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
+ type_latency_hist[j]
+ .add(seq->getTypeLatencyHist(j));
+ hit_type_latency_hist[j]
+ .add(seq->getHitTypeLatencyHist(j));
+ miss_type_latency_hist[j]
+ .add(seq->getMissTypeLatencyHist(j));
+ }
+
+ // add the per machine type miss latencies
+ for (uint32_t j = 0; j < MachineType_NUM; ++j) {
+ hit_mach_latency_hist[j]
+ .add(seq->getHitMachLatencyHist(j));
+ miss_mach_latency_hist[j]
+ .add(seq->getMissMachLatencyHist(j));
+
+ issue_to_initial_delay_hist[j].add(
+ seq->getIssueToInitialDelayHist(MachineType(j)));
+
+ initial_to_forward_delay_hist[j].add(
+ seq->getInitialToForwardDelayHist(MachineType(j)));
+ forward_to_first_response_delay_hist[j].add(seq->
+ getForwardRequestToFirstResponseHist(MachineType(j)));
+
+ first_response_to_completion_delay_hist[j].add(seq->
+ getFirstResponseToCompletionDelayHist(MachineType(j)));
+ incomplete_times[j] +=
+ seq->getIncompleteTimes(MachineType(j));
+ }
+
+ // add the per (request, machine) type miss latencies
+ for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
+ for (uint32_t k = 0; k < MachineType_NUM; k++) {
+ hit_type_mach_latency_hist[j][k].add(
+ seq->getHitTypeMachLatencyHist(j,k));
+ miss_type_mach_latency_hist[j][k].add(
+ seq->getMissTypeMachLatencyHist(j,k));
+ }
+ }
+ }
+ }
+ }
+
+ out << "latency: " << latency_hist << endl;
+ for (int i = 0; i < RubyRequestType_NUM; i++) {
+ if (type_latency_hist[i].size() > 0) {
+ out << "latency: " << RubyRequestType(i) << ": "
+ << type_latency_hist[i] << endl;
+ }
+ }
+
+ out << "hit latency: " << hit_latency_hist << endl;
+ for (int i = 0; i < RubyRequestType_NUM; i++) {
+ if (hit_type_latency_hist[i].size() > 0) {
+ out << "hit latency: " << RubyRequestType(i) << ": "
+ << hit_type_latency_hist[i] << endl;
+ }
+ }
+
+ for (int i = 0; i < MachineType_NUM; i++) {
+ if (hit_mach_latency_hist[i].size() > 0) {
+ out << "hit latency: " << MachineType(i) << ": "
+ << hit_mach_latency_hist[i] << endl;
+ }
+ }
+
+ for (int i = 0; i < RubyRequestType_NUM; i++) {
+ for (int j = 0; j < MachineType_NUM; j++) {
+ if (hit_type_mach_latency_hist[i][j].size() > 0) {
+ out << "hit latency: " << RubyRequestType(i)
+ << ": " << MachineType(j) << ": "
+ << hit_type_mach_latency_hist[i][j] << endl;
+ }
+ }
+ }
+
+ out << "miss latency: " << miss_latency_hist << endl;
+ for (int i = 0; i < RubyRequestType_NUM; i++) {
+ if (miss_type_latency_hist[i].size() > 0) {
+ out << "miss latency: " << RubyRequestType(i) << ": "
+ << miss_type_latency_hist[i] << endl;
+ }
+ }
+
+ for (int i = 0; i < MachineType_NUM; i++) {
+ if (miss_mach_latency_hist[i].size() > 0) {
+ out << "miss latency: " << MachineType(i) << ": "
+ << miss_mach_latency_hist[i] << endl;
+
+ out << "miss latency: " << MachineType(i)
+ << "::issue_to_initial_request: "
+ << issue_to_initial_delay_hist[i] << endl;
+ out << "miss latency: " << MachineType(i)
+ << "::initial_to_forward_request: "
+ << initial_to_forward_delay_hist[i] << endl;
+ out << "miss latency: " << MachineType(i)
+ << "::forward_to_first_response: "
+ << forward_to_first_response_delay_hist[i] << endl;
+ out << "miss latency: " << MachineType(i)
+ << "::first_response_to_completion: "
+ << first_response_to_completion_delay_hist[i] << endl;
+ out << "incomplete times: " << incomplete_times[i] << endl;
+ }
+ }
+
+ for (int i = 0; i < RubyRequestType_NUM; i++) {
+ for (int j = 0; j < MachineType_NUM; j++) {
+ if (miss_type_mach_latency_hist[i][j].size() > 0) {
+ out << "miss latency: " << RubyRequestType(i)
+ << ": " << MachineType(j) << ": "
+ << miss_type_mach_latency_hist[i][j] << endl;
+ }
+ }
+ }
+
+ out << endl;
+}
+
+void
Profiler::printStats(ostream& out, bool short_stats)
{
out << endl;
@@ -296,68 +459,7 @@ Profiler::printStats(ostream& out, bool short_stats)
if (!short_stats) {
out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
out << "----------------------------------------" << endl;
- out << "miss_latency: " << m_allMissLatencyHistogram << endl;
- for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
- if (m_missLatencyHistograms[i].size() > 0) {
- out << "miss_latency_" << RubyRequestType(i) << ": "
- << m_missLatencyHistograms[i] << endl;
- }
- }
- for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
- if (m_machLatencyHistograms[i].size() > 0) {
- out << "miss_latency_" << GenericMachineType(i) << ": "
- << m_machLatencyHistograms[i] << endl;
- }
- }
-
- out << "miss_latency_wCC_issue_to_initial_request: "
- << m_wCCIssueToInitialRequestHistogram << endl;
- out << "miss_latency_wCC_initial_forward_request: "
- << m_wCCInitialRequestToForwardRequestHistogram << endl;
- out << "miss_latency_wCC_forward_to_first_response: "
- << m_wCCForwardRequestToFirstResponseHistogram << endl;
- out << "miss_latency_wCC_first_response_to_completion: "
- << m_wCCFirstResponseToCompleteHistogram << endl;
- out << "imcomplete_wCC_Times: " << m_wCCIncompleteTimes << endl;
- out << "miss_latency_dir_issue_to_initial_request: "
- << m_dirIssueToInitialRequestHistogram << endl;
- out << "miss_latency_dir_initial_forward_request: "
- << m_dirInitialRequestToForwardRequestHistogram << endl;
- out << "miss_latency_dir_forward_to_first_response: "
- << m_dirForwardRequestToFirstResponseHistogram << endl;
- out << "miss_latency_dir_first_response_to_completion: "
- << m_dirFirstResponseToCompleteHistogram << endl;
- out << "imcomplete_dir_Times: " << m_dirIncompleteTimes << endl;
-
- for (int i = 0; i < m_missMachLatencyHistograms.size(); i++) {
- for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
- if (m_missMachLatencyHistograms[i][j].size() > 0) {
- out << "miss_latency_" << RubyRequestType(i)
- << "_" << GenericMachineType(j) << ": "
- << m_missMachLatencyHistograms[i][j] << endl;
- }
- }
- }
-
- out << endl;
-
- out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
- out << "------------------------------------" << endl;
- out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
- for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
- if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
- out << "prefetch_latency_" << RubyRequestType(i) << ": "
- << m_SWPrefetchLatencyHistograms[i] << endl;
- }
- }
- for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
- if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
- out << "prefetch_latency_" << GenericMachineType(i) << ": "
- << m_SWPrefetchMachLatencyHistograms[i] << endl;
- }
- }
- out << "prefetch_latency_L2Miss:"
- << m_SWPrefetchL2MissLatencyHistogram << endl;
+ printMissLatencyProfile(out);
if (m_all_sharing_histogram.size() > 0) {
out << "all_sharing: " << m_all_sharing_histogram << endl;
@@ -434,44 +536,6 @@ Profiler::clearStats()
m_real_time_start_time = time(NULL);
m_busyBankCount = 0;
-
- m_missLatencyHistograms.resize(RubyRequestType_NUM);
- for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
- m_missLatencyHistograms[i].clear(200);
- }
- m_machLatencyHistograms.resize(GenericMachineType_NUM+1);
- for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
- m_machLatencyHistograms[i].clear(200);
- }
- m_missMachLatencyHistograms.resize(RubyRequestType_NUM);
- for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
- m_missMachLatencyHistograms[i].resize(GenericMachineType_NUM+1);
- for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
- m_missMachLatencyHistograms[i][j].clear(200);
- }
- }
- m_allMissLatencyHistogram.clear(200);
- m_wCCIssueToInitialRequestHistogram.clear(200);
- m_wCCInitialRequestToForwardRequestHistogram.clear(200);
- m_wCCForwardRequestToFirstResponseHistogram.clear(200);
- m_wCCFirstResponseToCompleteHistogram.clear(200);
- m_wCCIncompleteTimes = 0;
- m_dirIssueToInitialRequestHistogram.clear(200);
- m_dirInitialRequestToForwardRequestHistogram.clear(200);
- m_dirForwardRequestToFirstResponseHistogram.clear(200);
- m_dirFirstResponseToCompleteHistogram.clear(200);
- m_dirIncompleteTimes = 0;
-
- m_SWPrefetchLatencyHistograms.resize(RubyRequestType_NUM);
- for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
- m_SWPrefetchLatencyHistograms[i].clear(200);
- }
- m_SWPrefetchMachLatencyHistograms.resize(GenericMachineType_NUM+1);
- for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
- m_SWPrefetchMachLatencyHistograms[i].clear(200);
- }
- m_allSWPrefetchLatencyHistogram.clear(200);
-
m_read_sharing_histogram.clear();
m_write_sharing_histogram.clear();
m_all_sharing_histogram.clear();
@@ -530,85 +594,6 @@ Profiler::bankBusy()
m_busyBankCount++;
}
-// non-zero cycle demand request
-void
-Profiler::missLatency(Cycles cycles,
- RubyRequestType type,
- const GenericMachineType respondingMach)
-{
- m_allMissLatencyHistogram.add(cycles);
- m_missLatencyHistograms[type].add(cycles);
- m_machLatencyHistograms[respondingMach].add(cycles);
- m_missMachLatencyHistograms[type][respondingMach].add(cycles);
-}
-
-void
-Profiler::missLatencyWcc(Cycles issuedTime,
- Cycles initialRequestTime,
- Cycles forwardRequestTime,
- Cycles firstResponseTime,
- Cycles completionTime)
-{
- if ((issuedTime <= initialRequestTime) &&
- (initialRequestTime <= forwardRequestTime) &&
- (forwardRequestTime <= firstResponseTime) &&
- (firstResponseTime <= completionTime)) {
- m_wCCIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
-
- m_wCCInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
- initialRequestTime);
-
- m_wCCForwardRequestToFirstResponseHistogram.add(firstResponseTime -
- forwardRequestTime);
-
- m_wCCFirstResponseToCompleteHistogram.add(completionTime -
- firstResponseTime);
- } else {
- m_wCCIncompleteTimes++;
- }
-}
-
-void
-Profiler::missLatencyDir(Cycles issuedTime,
- Cycles initialRequestTime,
- Cycles forwardRequestTime,
- Cycles firstResponseTime,
- Cycles completionTime)
-{
- if ((issuedTime <= initialRequestTime) &&
- (initialRequestTime <= forwardRequestTime) &&
- (forwardRequestTime <= firstResponseTime) &&
- (firstResponseTime <= completionTime)) {
- m_dirIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
-
- m_dirInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
- initialRequestTime);
-
- m_dirForwardRequestToFirstResponseHistogram.add(firstResponseTime -
- forwardRequestTime);
-
- m_dirFirstResponseToCompleteHistogram.add(completionTime -
- firstResponseTime);
- } else {
- m_dirIncompleteTimes++;
- }
-}
-
-// non-zero cycle prefetch request
-void
-Profiler::swPrefetchLatency(Cycles cycles, RubyRequestType type,
- const GenericMachineType respondingMach)
-{
- m_allSWPrefetchLatencyHistogram.add(cycles);
- m_SWPrefetchLatencyHistograms[type].add(cycles);
- m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles);
-
- if (respondingMach == GenericMachineType_Directory ||
- respondingMach == GenericMachineType_NUM) {
- m_SWPrefetchL2MissLatencyHistogram.add(cycles);
- }
-}
-
// Helper function
static double
process_memory_total()
diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh
index 23efed67a..e7b3c5f8d 100644
--- a/src/mem/ruby/profiler/Profiler.hh
+++ b/src/mem/ruby/profiler/Profiler.hh
@@ -52,7 +52,6 @@
#include "base/hashmap.hh"
#include "mem/protocol/AccessType.hh"
-#include "mem/protocol/GenericMachineType.hh"
#include "mem/protocol/PrefetchBit.hh"
#include "mem/protocol/RubyAccessMode.hh"
#include "mem/protocol/RubyRequestType.hh"
@@ -110,21 +109,7 @@ class Profiler : public SimObject
void controllerBusy(MachineID machID);
void bankBusy();
-
- void missLatency(Cycles t, RubyRequestType type,
- const GenericMachineType respondingMach);
-
- void missLatencyWcc(Cycles issuedTime, Cycles initialRequestTime,
- Cycles forwardRequestTime, Cycles firstResponseTime,
- Cycles completionTime);
-
- void missLatencyDir(Cycles issuedTime, Cycles initialRequestTime,
- Cycles forwardRequestTime, Cycles firstResponseTime,
- Cycles completionTime);
- void swPrefetchLatency(Cycles t, RubyRequestType type,
- const GenericMachineType respondingMach);
-
void print(std::ostream& out) const;
void rubyWatch(int proc);
@@ -141,6 +126,7 @@ class Profiler : public SimObject
void printRequestProfile(std::ostream &out) const;
void printDelayProfile(std::ostream &out) const;
void printOutstandingReqProfile(std::ostream &out) const;
+ void printMissLatencyProfile(std::ostream &out) const;
private:
// Private copy constructor and assignment operator
@@ -161,27 +147,6 @@ class Profiler : public SimObject
int64 m_cache_to_cache;
int64 m_memory_to_cache;
- std::vector<Histogram> m_missLatencyHistograms;
- std::vector<Histogram> m_machLatencyHistograms;
- std::vector< std::vector<Histogram> > m_missMachLatencyHistograms;
- Histogram m_wCCIssueToInitialRequestHistogram;
- Histogram m_wCCInitialRequestToForwardRequestHistogram;
- Histogram m_wCCForwardRequestToFirstResponseHistogram;
- Histogram m_wCCFirstResponseToCompleteHistogram;
- int64 m_wCCIncompleteTimes;
- Histogram m_dirIssueToInitialRequestHistogram;
- Histogram m_dirInitialRequestToForwardRequestHistogram;
- Histogram m_dirForwardRequestToFirstResponseHistogram;
- Histogram m_dirFirstResponseToCompleteHistogram;
- int64 m_dirIncompleteTimes;
-
- Histogram m_allMissLatencyHistogram;
-
- Histogram m_allSWPrefetchLatencyHistogram;
- Histogram m_SWPrefetchL2MissLatencyHistogram;
- std::vector<Histogram> m_SWPrefetchLatencyHistograms;
- std::vector<Histogram> m_SWPrefetchMachLatencyHistograms;
-
Histogram m_average_latency_estimate;
m5::hash_set<Address> m_watch_address_set;