diff options
Diffstat (limited to 'src/mem')
-rw-r--r-- | src/mem/ruby/common/Histogram.cc | 97 | ||||
-rw-r--r-- | src/mem/ruby/common/Histogram.hh | 30 | ||||
-rw-r--r-- | src/mem/ruby/network/Network.hh | 2 | ||||
-rw-r--r-- | src/mem/ruby/profiler/Profiler.cc | 62 | ||||
-rw-r--r-- | src/mem/ruby/profiler/Profiler.hh | 7 | ||||
-rw-r--r-- | src/mem/ruby/slicc_interface/AbstractController.cc | 16 | ||||
-rw-r--r-- | src/mem/ruby/slicc_interface/AbstractController.hh | 12 | ||||
-rw-r--r-- | src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc | 6 | ||||
-rw-r--r-- | src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh | 1 |
9 files changed, 147 insertions, 86 deletions
diff --git a/src/mem/ruby/common/Histogram.cc b/src/mem/ruby/common/Histogram.cc index dcb723f1b..0558e5198 100644 --- a/src/mem/ruby/common/Histogram.cc +++ b/src/mem/ruby/common/Histogram.cc @@ -34,11 +34,10 @@ using namespace std; -Histogram::Histogram(int binsize, int bins) +Histogram::Histogram(int binsize, uint32_t bins) { m_binsize = binsize; - m_bins = bins; - clear(); + clear(bins); } Histogram::~Histogram() @@ -46,29 +45,43 @@ Histogram::~Histogram() } void -Histogram::clear(int binsize, int bins) +Histogram::clear(int binsize, uint32_t bins) { m_binsize = binsize; clear(bins); } void -Histogram::clear(int bins) +Histogram::clear(uint32_t bins) { - m_bins = bins; m_largest_bin = 0; m_max = 0; - m_data.resize(m_bins); - for (int i = 0; i < m_bins; i++) { + m_data.resize(bins); + for (uint32_t i = 0; i < bins; i++) { m_data[i] = 0; } + m_count = 0; m_max = 0; - m_sumSamples = 0; m_sumSquaredSamples = 0; } +void +Histogram::doubleBinSize() +{ + assert(m_binsize != -1); + uint32_t t_bins = m_data.size(); + + for (uint32_t i = 0; i < t_bins/2; i++) { + m_data[i] = m_data[i*2] + m_data[i*2 + 1]; + } + for (uint32_t i = t_bins/2; i < t_bins; i++) { + m_data[i] = 0; + } + + m_binsize *= 2; +} void Histogram::add(int64 value) @@ -80,7 +93,8 @@ Histogram::add(int64 value) m_sumSamples += value; m_sumSquaredSamples += (value*value); - int index; + uint32_t index; + if (m_binsize == -1) { // This is a log base 2 histogram if (value == 0) { @@ -93,37 +107,59 @@ Histogram::add(int64 value) } } else { // This is a linear histogram - while (m_max >= (m_bins * m_binsize)) { - for (int i = 0; i < m_bins/2; i++) { - m_data[i] = m_data[i*2] + m_data[i*2 + 1]; - } - for (int i = m_bins/2; i < m_bins; i++) { - m_data[i] = 0; - } - m_binsize *= 2; - } + uint32_t t_bins = m_data.size(); + + while (m_max >= (t_bins * m_binsize)) doubleBinSize(); index = value/m_binsize; } - assert(index >= 0); + + assert(index < m_data.size()); m_data[index]++; m_largest_bin = max(m_largest_bin, index); } void -Histogram::add(const Histogram& hist) +Histogram::add(Histogram& hist) { - assert(hist.getBins() == m_bins); - assert(hist.getBinSize() == -1); // assume log histogram - assert(m_binsize == -1); + uint32_t t_bins = m_data.size(); - for (int j = 0; j < hist.getData(0); j++) { - add(0); + if (hist.getBins() != t_bins) { + fatal("Histograms with different number of bins cannot be combined!"); } - for (int i = 1; i < m_bins; i++) { - for (int j = 0; j < hist.getData(i); j++) { - add(1<<(i-1)); // account for the + 1 index + m_max = max(m_max, hist.getMax()); + m_count += hist.size(); + m_sumSamples += hist.getTotal(); + m_sumSquaredSamples += hist.getSquaredTotal(); + + // Both histograms are log base 2. + if (hist.getBinSize() == -1 && m_binsize == -1) { + for (int j = 0; j < hist.getData(0); j++) { + add(0); + } + + for (uint32_t i = 1; i < t_bins; i++) { + for (int j = 0; j < hist.getData(i); j++) { + add(1<<(i-1)); // account for the + 1 index + } + } + } else if (hist.getBinSize() >= 1 && m_binsize >= 1) { + // Both the histogram are linear. + // We are assuming that the two histograms have the same + // minimum value that they can store. + + while (m_binsize > hist.getBinSize()) hist.doubleBinSize(); + while (hist.getBinSize() > m_binsize) doubleBinSize(); + + assert(m_binsize == hist.getBinSize()); + + for (uint32_t i = 0; i < t_bins; i++) { + m_data[i] += hist.getData(i); + + if (m_data[i] > 0) m_largest_bin = i; } + } else { + fatal("Don't know how to combine log and linear histograms!"); } } @@ -177,7 +213,8 @@ Histogram::printWithMultiplier(ostream& out, double multiplier) const << " | "; out << "standard deviation: " << getStandardDeviation() << " |"; } - for (int i = 0; i < m_bins && i <= m_largest_bin; i++) { + + for (uint32_t i = 0; i <= m_largest_bin; i++) { if (multiplier == 1.0) { out << " " << m_data[i]; } else { diff --git a/src/mem/ruby/common/Histogram.hh b/src/mem/ruby/common/Histogram.hh index bfc0e4293..c34e39af1 100644 --- a/src/mem/ruby/common/Histogram.hh +++ b/src/mem/ruby/common/Histogram.hh @@ -37,34 +37,38 @@ class Histogram { public: - Histogram(int binsize = 1, int bins = 50); + Histogram(int binsize = 1, uint32_t bins = 50); ~Histogram(); void add(int64 value); - void add(const Histogram& hist); - void clear() { clear(m_bins); } - void clear(int bins); - void clear(int binsize, int bins); - int64 size() const { return m_count; } - int getBins() const { return m_bins; } + void add(Histogram& hist); + void doubleBinSize(); + + void clear() { clear(m_data.size()); } + void clear(uint32_t bins); + void clear(int binsize, uint32_t bins); + + uint64_t size() const { return m_count; } + uint32_t getBins() const { return m_data.size(); } int getBinSize() const { return m_binsize; } int64 getTotal() const { return m_sumSamples; } - int64 getData(int index) const { return m_data[index]; } + uint64_t getSquaredTotal() const { return m_sumSquaredSamples; } + uint64_t getData(int index) const { return m_data[index]; } + int64 getMax() const { return m_max; } void printWithMultiplier(std::ostream& out, double multiplier) const; void printPercent(std::ostream& out) const; void print(std::ostream& out) const; private: - std::vector<int64> m_data; + std::vector<uint64_t> m_data; int64 m_max; // the maximum value seen so far - int64 m_count; // the number of elements added + uint64_t m_count; // the number of elements added int m_binsize; // the size of each bucket - int m_bins; // the number of buckets - int m_largest_bin; // the largest bin used + uint32_t m_largest_bin; // the largest bin used int64 m_sumSamples; // the sum of all samples - int64 m_sumSquaredSamples; // the sum of the square of all samples + uint64_t m_sumSquaredSamples; // the sum of the square of all samples double getStandardDeviation() const; }; diff --git a/src/mem/ruby/network/Network.hh b/src/mem/ruby/network/Network.hh index a59caebbd..9784af759 100644 --- a/src/mem/ruby/network/Network.hh +++ b/src/mem/ruby/network/Network.hh @@ -65,7 +65,7 @@ class Network : public ClockedObject virtual void init(); - static int getNumberOfVirtualNetworks() { return m_virtual_networks; } + static uint32_t getNumberOfVirtualNetworks() { return m_virtual_networks; } static uint32_t MessageSizeType_to_int(MessageSizeType size_type); // returns the queue requested for the given component diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index 546934d52..165561fe8 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -224,6 +224,38 @@ Profiler::printRequestProfile(ostream &out) } void +Profiler::printDelayProfile(ostream &out) +{ + out << "Message Delayed Cycles" << endl; + out << "----------------------" << endl; + + uint32_t numVNets = Network::getNumberOfVirtualNetworks(); + Histogram delayHistogram; + std::vector<Histogram> delayVCHistogram(numVNets); + + for (uint32_t i = 0; i < MachineType_NUM; i++) { + for (map<uint32_t, AbstractController*>::iterator it = + g_abs_controls[i].begin(); + it != g_abs_controls[i].end(); ++it) { + + AbstractController *ctr = (*it).second; + delayHistogram.add(ctr->getDelayHist()); + + for (uint32_t i = 0; i < numVNets; i++) { + delayVCHistogram[i].add(ctr->getDelayVCHist(i)); + } + } + } + + out << "Total_delay_cycles: " << delayHistogram << endl; + + for (int i = 0; i < numVNets; i++) { + out << " virtual_network_" << i << "_delay_cycles: " + << delayVCHistogram[i] << endl; + } +} + +void Profiler::printStats(ostream& out, bool short_stats) { out << endl; @@ -435,16 +467,7 @@ Profiler::printStats(ostream& out, bool short_stats) } out << endl; - out << "Message Delayed Cycles" << endl; - out << "----------------------" << endl; - out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; - out << "Total_nonPF_delay_cycles: " - << m_delayedCyclesNonPFHistogram << endl; - for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { - out << " virtual_network_" << i << "_delay_cycles: " - << m_delayedCyclesVCHistograms[i] << endl; - } - + printDelayProfile(out); printResourceUsage(out); } } @@ -488,14 +511,6 @@ Profiler::clearStats() m_busyBankCount = 0; - m_delayedCyclesHistogram.clear(); - m_delayedCyclesNonPFHistogram.clear(); - int size = Network::getNumberOfVirtualNetworks(); - m_delayedCyclesVCHistograms.resize(size); - for (int i = 0; i < size; i++) { - m_delayedCyclesVCHistograms[i].clear(); - } - m_missLatencyHistograms.resize(RubyRequestType_NUM); for (int i = 0; i < m_missLatencyHistograms.size(); i++) { m_missLatencyHistograms[i].clear(200); @@ -594,17 +609,6 @@ Profiler::profileSharing(const Address& addr, AccessType type, } void -Profiler::profileMsgDelay(uint32_t virtualNetwork, Time delayCycles) -{ - assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); - m_delayedCyclesHistogram.add(delayCycles); - m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); - if (virtualNetwork != 0) { - m_delayedCyclesNonPFHistogram.add(delayCycles); - } -} - -void Profiler::profilePFWait(Time waitTime) { m_prefetchWaitHistogram.add(waitTime); diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 5b370de54..ecd57c035 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -152,8 +152,6 @@ class Profiler : public SimObject void sequencerRequests(int num) { m_sequencer_requests.add(num); } - void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles); - void print(std::ostream& out) const; void rubyWatch(int proc); @@ -172,6 +170,7 @@ class Profiler : public SimObject private: void printRequestProfile(std::ostream &out); + void printDelayProfile(std::ostream &out); private: // Private copy constructor and assignment operator @@ -226,10 +225,6 @@ class Profiler : public SimObject std::vector<Histogram> m_SWPrefetchLatencyHistograms; std::vector<Histogram> m_SWPrefetchMachLatencyHistograms; - Histogram m_delayedCyclesHistogram; - Histogram m_delayedCyclesNonPFHistogram; - std::vector<Histogram> m_delayedCyclesVCHistograms; - Histogram m_outstanding_requests; Histogram m_outstanding_persistent_requests; diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index adf411f82..bcd09796a 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -52,6 +52,14 @@ AbstractController::clearStats() { m_requestProfileMap.clear(); m_request_count = 0; + + m_delayHistogram.clear(); + + uint32_t size = Network::getNumberOfVirtualNetworks(); + m_delayVCHistogram.resize(size); + for (uint32_t i = 0; i < size; i++) { + m_delayVCHistogram[i].clear(); + } } void @@ -63,3 +71,11 @@ AbstractController::profileRequest(const std::string &request) // default value which is 0 m_requestProfileMap[request]++; } + +void +AbstractController::profileMsgDelay(uint32_t virtualNetwork, Time delay) +{ + assert(virtualNetwork < m_delayVCHistogram.size()); + m_delayHistogram.add(delay); + m_delayVCHistogram[virtualNetwork].add(delay); +} diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index 0e3af44a1..c452da723 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -36,6 +36,7 @@ #include "mem/ruby/common/Address.hh" #include "mem/ruby/common/Consumer.hh" #include "mem/ruby/common/DataBlock.hh" +#include "mem/ruby/common/Histogram.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/recorder/CacheRecorder.hh" #include "mem/ruby/system/MachineID.hh" @@ -92,9 +93,15 @@ class AbstractController : public ClockedObject, public Consumer const std::map<std::string, uint64_t>& getRequestProfileMap() const { return m_requestProfileMap; } + Histogram& getDelayHist() { return m_delayHistogram; } + Histogram& getDelayVCHist(uint32_t index) + { return m_delayVCHistogram[index]; } + protected: //! Profiles original cache requests including PUTs void profileRequest(const std::string &request); + //! Profiles the delay associated with messages. + void profileMsgDelay(uint32_t virtualNetwork, Time delay); protected: int m_transitions_per_cycle; @@ -121,6 +128,11 @@ class AbstractController : public ClockedObject, public Consumer //! call requisite function for updating the count. std::map<std::string, uint64_t> m_requestProfileMap; uint64_t m_request_count; + + //! Histogram for profiling delay for the messages this controller + //! cares for + Histogram m_delayHistogram; + std::vector<Histogram> m_delayVCHistogram; }; #endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__ diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc index b8503c2cb..a8d8198ca 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc +++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc @@ -56,12 +56,6 @@ profile_sharing(const Address& addr, AccessType type, NodeID requestor, } void -profileMsgDelay(uint32_t virtualNetwork, Time delayCycles) -{ - g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles); -} - -void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) { diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh index 1796d9442..bfc0afd56 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh @@ -51,7 +51,6 @@ void profile_token_retry(const Address& addr, AccessType type, int count); void profile_filter_action(int action); void profile_persistent_prediction(const Address& addr, AccessType type); void profile_average_latency_estimate(int latency); -void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles); void profile_multicast_retry(const Address& addr, int count); void profileGetX(const Address& datablock, const Address& PC, const Set& owner, |