summaryrefslogtreecommitdiff
path: root/src/mem/ruby
diff options
context:
space:
mode:
Diffstat (limited to 'src/mem/ruby')
-rw-r--r--src/mem/ruby/common/Histogram.cc97
-rw-r--r--src/mem/ruby/common/Histogram.hh30
-rw-r--r--src/mem/ruby/network/Network.hh2
-rw-r--r--src/mem/ruby/profiler/Profiler.cc62
-rw-r--r--src/mem/ruby/profiler/Profiler.hh7
-rw-r--r--src/mem/ruby/slicc_interface/AbstractController.cc16
-rw-r--r--src/mem/ruby/slicc_interface/AbstractController.hh12
-rw-r--r--src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc6
-rw-r--r--src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh1
9 files changed, 147 insertions, 86 deletions
diff --git a/src/mem/ruby/common/Histogram.cc b/src/mem/ruby/common/Histogram.cc
index dcb723f1b..0558e5198 100644
--- a/src/mem/ruby/common/Histogram.cc
+++ b/src/mem/ruby/common/Histogram.cc
@@ -34,11 +34,10 @@
using namespace std;
-Histogram::Histogram(int binsize, int bins)
+Histogram::Histogram(int binsize, uint32_t bins)
{
m_binsize = binsize;
- m_bins = bins;
- clear();
+ clear(bins);
}
Histogram::~Histogram()
@@ -46,29 +45,43 @@ Histogram::~Histogram()
}
void
-Histogram::clear(int binsize, int bins)
+Histogram::clear(int binsize, uint32_t bins)
{
m_binsize = binsize;
clear(bins);
}
void
-Histogram::clear(int bins)
+Histogram::clear(uint32_t bins)
{
- m_bins = bins;
m_largest_bin = 0;
m_max = 0;
- m_data.resize(m_bins);
- for (int i = 0; i < m_bins; i++) {
+ m_data.resize(bins);
+ for (uint32_t i = 0; i < bins; i++) {
m_data[i] = 0;
}
+
m_count = 0;
m_max = 0;
-
m_sumSamples = 0;
m_sumSquaredSamples = 0;
}
+void
+Histogram::doubleBinSize()
+{
+ assert(m_binsize != -1);
+ uint32_t t_bins = m_data.size();
+
+ for (uint32_t i = 0; i < t_bins/2; i++) {
+ m_data[i] = m_data[i*2] + m_data[i*2 + 1];
+ }
+ for (uint32_t i = t_bins/2; i < t_bins; i++) {
+ m_data[i] = 0;
+ }
+
+ m_binsize *= 2;
+}
void
Histogram::add(int64 value)
@@ -80,7 +93,8 @@ Histogram::add(int64 value)
m_sumSamples += value;
m_sumSquaredSamples += (value*value);
- int index;
+ uint32_t index;
+
if (m_binsize == -1) {
// This is a log base 2 histogram
if (value == 0) {
@@ -93,37 +107,59 @@ Histogram::add(int64 value)
}
} else {
// This is a linear histogram
- while (m_max >= (m_bins * m_binsize)) {
- for (int i = 0; i < m_bins/2; i++) {
- m_data[i] = m_data[i*2] + m_data[i*2 + 1];
- }
- for (int i = m_bins/2; i < m_bins; i++) {
- m_data[i] = 0;
- }
- m_binsize *= 2;
- }
+ uint32_t t_bins = m_data.size();
+
+ while (m_max >= (t_bins * m_binsize)) doubleBinSize();
index = value/m_binsize;
}
- assert(index >= 0);
+
+ assert(index < m_data.size());
m_data[index]++;
m_largest_bin = max(m_largest_bin, index);
}
void
-Histogram::add(const Histogram& hist)
+Histogram::add(Histogram& hist)
{
- assert(hist.getBins() == m_bins);
- assert(hist.getBinSize() == -1); // assume log histogram
- assert(m_binsize == -1);
+ uint32_t t_bins = m_data.size();
- for (int j = 0; j < hist.getData(0); j++) {
- add(0);
+ if (hist.getBins() != t_bins) {
+ fatal("Histograms with different number of bins cannot be combined!");
}
- for (int i = 1; i < m_bins; i++) {
- for (int j = 0; j < hist.getData(i); j++) {
- add(1<<(i-1)); // account for the + 1 index
+ m_max = max(m_max, hist.getMax());
+ m_count += hist.size();
+ m_sumSamples += hist.getTotal();
+ m_sumSquaredSamples += hist.getSquaredTotal();
+
+ // Both histograms are log base 2.
+ if (hist.getBinSize() == -1 && m_binsize == -1) {
+ for (int j = 0; j < hist.getData(0); j++) {
+ add(0);
+ }
+
+ for (uint32_t i = 1; i < t_bins; i++) {
+ for (int j = 0; j < hist.getData(i); j++) {
+ add(1<<(i-1)); // account for the + 1 index
+ }
+ }
+ } else if (hist.getBinSize() >= 1 && m_binsize >= 1) {
+ // Both the histogram are linear.
+ // We are assuming that the two histograms have the same
+ // minimum value that they can store.
+
+ while (m_binsize > hist.getBinSize()) hist.doubleBinSize();
+ while (hist.getBinSize() > m_binsize) doubleBinSize();
+
+ assert(m_binsize == hist.getBinSize());
+
+ for (uint32_t i = 0; i < t_bins; i++) {
+ m_data[i] += hist.getData(i);
+
+ if (m_data[i] > 0) m_largest_bin = i;
}
+ } else {
+ fatal("Don't know how to combine log and linear histograms!");
}
}
@@ -177,7 +213,8 @@ Histogram::printWithMultiplier(ostream& out, double multiplier) const
<< " | ";
out << "standard deviation: " << getStandardDeviation() << " |";
}
- for (int i = 0; i < m_bins && i <= m_largest_bin; i++) {
+
+ for (uint32_t i = 0; i <= m_largest_bin; i++) {
if (multiplier == 1.0) {
out << " " << m_data[i];
} else {
diff --git a/src/mem/ruby/common/Histogram.hh b/src/mem/ruby/common/Histogram.hh
index bfc0e4293..c34e39af1 100644
--- a/src/mem/ruby/common/Histogram.hh
+++ b/src/mem/ruby/common/Histogram.hh
@@ -37,34 +37,38 @@
class Histogram
{
public:
- Histogram(int binsize = 1, int bins = 50);
+ Histogram(int binsize = 1, uint32_t bins = 50);
~Histogram();
void add(int64 value);
- void add(const Histogram& hist);
- void clear() { clear(m_bins); }
- void clear(int bins);
- void clear(int binsize, int bins);
- int64 size() const { return m_count; }
- int getBins() const { return m_bins; }
+ void add(Histogram& hist);
+ void doubleBinSize();
+
+ void clear() { clear(m_data.size()); }
+ void clear(uint32_t bins);
+ void clear(int binsize, uint32_t bins);
+
+ uint64_t size() const { return m_count; }
+ uint32_t getBins() const { return m_data.size(); }
int getBinSize() const { return m_binsize; }
int64 getTotal() const { return m_sumSamples; }
- int64 getData(int index) const { return m_data[index]; }
+ uint64_t getSquaredTotal() const { return m_sumSquaredSamples; }
+ uint64_t getData(int index) const { return m_data[index]; }
+ int64 getMax() const { return m_max; }
void printWithMultiplier(std::ostream& out, double multiplier) const;
void printPercent(std::ostream& out) const;
void print(std::ostream& out) const;
private:
- std::vector<int64> m_data;
+ std::vector<uint64_t> m_data;
int64 m_max; // the maximum value seen so far
- int64 m_count; // the number of elements added
+ uint64_t m_count; // the number of elements added
int m_binsize; // the size of each bucket
- int m_bins; // the number of buckets
- int m_largest_bin; // the largest bin used
+ uint32_t m_largest_bin; // the largest bin used
int64 m_sumSamples; // the sum of all samples
- int64 m_sumSquaredSamples; // the sum of the square of all samples
+ uint64_t m_sumSquaredSamples; // the sum of the square of all samples
double getStandardDeviation() const;
};
diff --git a/src/mem/ruby/network/Network.hh b/src/mem/ruby/network/Network.hh
index a59caebbd..9784af759 100644
--- a/src/mem/ruby/network/Network.hh
+++ b/src/mem/ruby/network/Network.hh
@@ -65,7 +65,7 @@ class Network : public ClockedObject
virtual void init();
- static int getNumberOfVirtualNetworks() { return m_virtual_networks; }
+ static uint32_t getNumberOfVirtualNetworks() { return m_virtual_networks; }
static uint32_t MessageSizeType_to_int(MessageSizeType size_type);
// returns the queue requested for the given component
diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc
index 546934d52..165561fe8 100644
--- a/src/mem/ruby/profiler/Profiler.cc
+++ b/src/mem/ruby/profiler/Profiler.cc
@@ -224,6 +224,38 @@ Profiler::printRequestProfile(ostream &out)
}
void
+Profiler::printDelayProfile(ostream &out)
+{
+ out << "Message Delayed Cycles" << endl;
+ out << "----------------------" << endl;
+
+ uint32_t numVNets = Network::getNumberOfVirtualNetworks();
+ Histogram delayHistogram;
+ std::vector<Histogram> delayVCHistogram(numVNets);
+
+ for (uint32_t i = 0; i < MachineType_NUM; i++) {
+ for (map<uint32_t, AbstractController*>::iterator it =
+ g_abs_controls[i].begin();
+ it != g_abs_controls[i].end(); ++it) {
+
+ AbstractController *ctr = (*it).second;
+ delayHistogram.add(ctr->getDelayHist());
+
+ for (uint32_t i = 0; i < numVNets; i++) {
+ delayVCHistogram[i].add(ctr->getDelayVCHist(i));
+ }
+ }
+ }
+
+ out << "Total_delay_cycles: " << delayHistogram << endl;
+
+ for (int i = 0; i < numVNets; i++) {
+ out << " virtual_network_" << i << "_delay_cycles: "
+ << delayVCHistogram[i] << endl;
+ }
+}
+
+void
Profiler::printStats(ostream& out, bool short_stats)
{
out << endl;
@@ -435,16 +467,7 @@ Profiler::printStats(ostream& out, bool short_stats)
}
out << endl;
- out << "Message Delayed Cycles" << endl;
- out << "----------------------" << endl;
- out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl;
- out << "Total_nonPF_delay_cycles: "
- << m_delayedCyclesNonPFHistogram << endl;
- for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
- out << " virtual_network_" << i << "_delay_cycles: "
- << m_delayedCyclesVCHistograms[i] << endl;
- }
-
+ printDelayProfile(out);
printResourceUsage(out);
}
}
@@ -488,14 +511,6 @@ Profiler::clearStats()
m_busyBankCount = 0;
- m_delayedCyclesHistogram.clear();
- m_delayedCyclesNonPFHistogram.clear();
- int size = Network::getNumberOfVirtualNetworks();
- m_delayedCyclesVCHistograms.resize(size);
- for (int i = 0; i < size; i++) {
- m_delayedCyclesVCHistograms[i].clear();
- }
-
m_missLatencyHistograms.resize(RubyRequestType_NUM);
for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
m_missLatencyHistograms[i].clear(200);
@@ -594,17 +609,6 @@ Profiler::profileSharing(const Address& addr, AccessType type,
}
void
-Profiler::profileMsgDelay(uint32_t virtualNetwork, Time delayCycles)
-{
- assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
- m_delayedCyclesHistogram.add(delayCycles);
- m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
- if (virtualNetwork != 0) {
- m_delayedCyclesNonPFHistogram.add(delayCycles);
- }
-}
-
-void
Profiler::profilePFWait(Time waitTime)
{
m_prefetchWaitHistogram.add(waitTime);
diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh
index 5b370de54..ecd57c035 100644
--- a/src/mem/ruby/profiler/Profiler.hh
+++ b/src/mem/ruby/profiler/Profiler.hh
@@ -152,8 +152,6 @@ class Profiler : public SimObject
void sequencerRequests(int num) { m_sequencer_requests.add(num); }
- void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles);
-
void print(std::ostream& out) const;
void rubyWatch(int proc);
@@ -172,6 +170,7 @@ class Profiler : public SimObject
private:
void printRequestProfile(std::ostream &out);
+ void printDelayProfile(std::ostream &out);
private:
// Private copy constructor and assignment operator
@@ -226,10 +225,6 @@ class Profiler : public SimObject
std::vector<Histogram> m_SWPrefetchLatencyHistograms;
std::vector<Histogram> m_SWPrefetchMachLatencyHistograms;
- Histogram m_delayedCyclesHistogram;
- Histogram m_delayedCyclesNonPFHistogram;
- std::vector<Histogram> m_delayedCyclesVCHistograms;
-
Histogram m_outstanding_requests;
Histogram m_outstanding_persistent_requests;
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index adf411f82..bcd09796a 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -52,6 +52,14 @@ AbstractController::clearStats()
{
m_requestProfileMap.clear();
m_request_count = 0;
+
+ m_delayHistogram.clear();
+
+ uint32_t size = Network::getNumberOfVirtualNetworks();
+ m_delayVCHistogram.resize(size);
+ for (uint32_t i = 0; i < size; i++) {
+ m_delayVCHistogram[i].clear();
+ }
}
void
@@ -63,3 +71,11 @@ AbstractController::profileRequest(const std::string &request)
// default value which is 0
m_requestProfileMap[request]++;
}
+
+void
+AbstractController::profileMsgDelay(uint32_t virtualNetwork, Time delay)
+{
+ assert(virtualNetwork < m_delayVCHistogram.size());
+ m_delayHistogram.add(delay);
+ m_delayVCHistogram[virtualNetwork].add(delay);
+}
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 0e3af44a1..c452da723 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -36,6 +36,7 @@
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Consumer.hh"
#include "mem/ruby/common/DataBlock.hh"
+#include "mem/ruby/common/Histogram.hh"
#include "mem/ruby/network/Network.hh"
#include "mem/ruby/recorder/CacheRecorder.hh"
#include "mem/ruby/system/MachineID.hh"
@@ -92,9 +93,15 @@ class AbstractController : public ClockedObject, public Consumer
const std::map<std::string, uint64_t>& getRequestProfileMap() const
{ return m_requestProfileMap; }
+ Histogram& getDelayHist() { return m_delayHistogram; }
+ Histogram& getDelayVCHist(uint32_t index)
+ { return m_delayVCHistogram[index]; }
+
protected:
//! Profiles original cache requests including PUTs
void profileRequest(const std::string &request);
+ //! Profiles the delay associated with messages.
+ void profileMsgDelay(uint32_t virtualNetwork, Time delay);
protected:
int m_transitions_per_cycle;
@@ -121,6 +128,11 @@ class AbstractController : public ClockedObject, public Consumer
//! call requisite function for updating the count.
std::map<std::string, uint64_t> m_requestProfileMap;
uint64_t m_request_count;
+
+ //! Histogram for profiling delay for the messages this controller
+ //! cares for
+ Histogram m_delayHistogram;
+ std::vector<Histogram> m_delayVCHistogram;
};
#endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc
index b8503c2cb..a8d8198ca 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc
+++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc
@@ -56,12 +56,6 @@ profile_sharing(const Address& addr, AccessType type, NodeID requestor,
}
void
-profileMsgDelay(uint32_t virtualNetwork, Time delayCycles)
-{
- g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles);
-}
-
-void
profileGetX(const Address& datablock, const Address& PC, const Set& owner,
const Set& sharers, NodeID requestor)
{
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh
index 1796d9442..bfc0afd56 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh
@@ -51,7 +51,6 @@ void profile_token_retry(const Address& addr, AccessType type, int count);
void profile_filter_action(int action);
void profile_persistent_prediction(const Address& addr, AccessType type);
void profile_average_latency_estimate(int latency);
-void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles);
void profile_multicast_retry(const Address& addr, int count);
void profileGetX(const Address& datablock, const Address& PC, const Set& owner,