diff options
author | Nathan Binkert <nate@binkert.org> | 2010-03-23 22:49:43 -0700 |
---|---|---|
committer | Nathan Binkert <nate@binkert.org> | 2010-03-23 22:49:43 -0700 |
commit | a2652a048aaba65302d5d191754d873fc94fb226 (patch) | |
tree | 1009119d03ff469782ed6a180cbee6ca6bddd2be /src/mem/ruby/profiler | |
parent | d2eb58967562d04044a5977787a312e9b259b9b7 (diff) | |
download | gem5-a2652a048aaba65302d5d191754d873fc94fb226.tar.xz |
ruby: continue style pass
Diffstat (limited to 'src/mem/ruby/profiler')
-rw-r--r-- | src/mem/ruby/profiler/AccessTraceForAddress.cc | 135 | ||||
-rw-r--r-- | src/mem/ruby/profiler/AccessTraceForAddress.hh | 110 | ||||
-rw-r--r-- | src/mem/ruby/profiler/AddressProfiler.cc | 474 | ||||
-rw-r--r-- | src/mem/ruby/profiler/AddressProfiler.hh | 135 | ||||
-rw-r--r-- | src/mem/ruby/profiler/CacheProfiler.cc | 169 | ||||
-rw-r--r-- | src/mem/ruby/profiler/CacheProfiler.hh | 90 | ||||
-rw-r--r-- | src/mem/ruby/profiler/MemCntrlProfiler.cc | 199 | ||||
-rw-r--r-- | src/mem/ruby/profiler/MemCntrlProfiler.hh | 141 | ||||
-rw-r--r-- | src/mem/ruby/profiler/Profiler.cc | 941 | ||||
-rw-r--r-- | src/mem/ruby/profiler/Profiler.hh | 261 | ||||
-rw-r--r-- | src/mem/ruby/profiler/StoreTrace.cc | 187 | ||||
-rw-r--r-- | src/mem/ruby/profiler/StoreTrace.hh | 120 |
12 files changed, 1480 insertions, 1482 deletions
diff --git a/src/mem/ruby/profiler/AccessTraceForAddress.cc b/src/mem/ruby/profiler/AccessTraceForAddress.cc index 48b28b735..978b72982 100644 --- a/src/mem/ruby/profiler/AccessTraceForAddress.cc +++ b/src/mem/ruby/profiler/AccessTraceForAddress.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,100 +26,96 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - */ - -#include "mem/ruby/profiler/AccessTraceForAddress.hh" #include "mem/ruby/common/Histogram.hh" +#include "mem/ruby/profiler/AccessTraceForAddress.hh" AccessTraceForAddress::AccessTraceForAddress() { - m_histogram_ptr = NULL; + m_histogram_ptr = NULL; } AccessTraceForAddress::AccessTraceForAddress(const Address& addr) { - m_addr = addr; - m_total = 0; - m_loads = 0; - m_stores = 0; - m_atomics = 0; - m_user = 0; - m_sharing = 0; - m_histogram_ptr = NULL; + m_addr = addr; + m_total = 0; + m_loads = 0; + m_stores = 0; + m_atomics = 0; + m_user = 0; + m_sharing = 0; + m_histogram_ptr = NULL; } AccessTraceForAddress::~AccessTraceForAddress() { - if (m_histogram_ptr != NULL) { - delete m_histogram_ptr; - m_histogram_ptr = NULL; - } + if (m_histogram_ptr != NULL) { + delete m_histogram_ptr; + m_histogram_ptr = NULL; + } } -void AccessTraceForAddress::print(ostream& out) const +void +AccessTraceForAddress::print(ostream& out) const { - out << m_addr; + out << m_addr; - if (m_histogram_ptr == NULL) { - out << " " << m_total; - out << " | " << m_loads; - out << " " << m_stores; - out << " " << m_atomics; - out << " | " << m_user; - out << " " << m_total-m_user; - out << " | " << m_sharing; - out << " | " << m_touched_by.count(); - } else { - assert(m_total == 0); - out << " " << (*m_histogram_ptr); - } + if (m_histogram_ptr == NULL) { + out << " " << m_total; + out << " | " << m_loads; + out << " " << m_stores; + out << " " << m_atomics; + out << " | " << m_user; + out << " " << m_total-m_user; + out << " | " << m_sharing; + out << " | " << m_touched_by.count(); + } else { + assert(m_total == 0); + out << " " << (*m_histogram_ptr); + } } -void AccessTraceForAddress::update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss) +void +AccessTraceForAddress::update(CacheRequestType type, + AccessModeType access_mode, NodeID cpu, + bool sharing_miss) { - m_touched_by.add(cpu); - m_total++; - if(type == CacheRequestType_ATOMIC) { - m_atomics++; - } else if(type == CacheRequestType_LD){ - m_loads++; - } else if (type == CacheRequestType_ST){ - m_stores++; - } else { - // ERROR_MSG("Trying to add invalid access to trace"); - } + m_touched_by.add(cpu); + m_total++; + if(type == CacheRequestType_ATOMIC) { + m_atomics++; + } else if(type == CacheRequestType_LD){ + m_loads++; + } else if (type == CacheRequestType_ST){ + m_stores++; + } else { + // ERROR_MSG("Trying to add invalid access to trace"); + } - if (access_mode == AccessModeType_UserMode) { - m_user++; - } + if (access_mode == AccessModeType_UserMode) { + m_user++; + } - if (sharing_miss) { - m_sharing++; - } + if (sharing_miss) { + m_sharing++; + } } -int AccessTraceForAddress::getTotal() const +int +AccessTraceForAddress::getTotal() const { - if (m_histogram_ptr == NULL) { - return m_total; - } else { - return m_histogram_ptr->getTotal(); - } + if (m_histogram_ptr == NULL) { + return m_total; + } else { + return m_histogram_ptr->getTotal(); + } } -void AccessTraceForAddress::addSample(int value) +void +AccessTraceForAddress::addSample(int value) { - assert(m_total == 0); - if (m_histogram_ptr == NULL) { - m_histogram_ptr = new Histogram; - } - m_histogram_ptr->add(value); -} - -bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2) -{ - return (n1->getTotal() > n2->getTotal()); + assert(m_total == 0); + if (m_histogram_ptr == NULL) { + m_histogram_ptr = new Histogram; + } + m_histogram_ptr->add(value); } diff --git a/src/mem/ruby/profiler/AccessTraceForAddress.hh b/src/mem/ruby/profiler/AccessTraceForAddress.hh index 2761d6de8..53b96856e 100644 --- a/src/mem/ruby/profiler/AccessTraceForAddress.hh +++ b/src/mem/ruby/profiler/AccessTraceForAddress.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,77 +26,60 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - * Description: - * - */ - -#ifndef ACCESSTRACEFORADDRESS_H -#define ACCESSTRACEFORADDRESS_H +#ifndef __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__ +#define __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__ -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/common/Address.hh" -#include "mem/protocol/CacheRequestType.hh" #include "mem/protocol/AccessModeType.hh" -#include "mem/ruby/system/NodeID.hh" +#include "mem/protocol/CacheRequestType.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Global.hh" #include "mem/ruby/common/Set.hh" -class Histogram; - -class AccessTraceForAddress { -public: - // Constructors - AccessTraceForAddress(); - explicit AccessTraceForAddress(const Address& addr); - - // Destructor - ~AccessTraceForAddress(); - - // Public Methods - - void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss); - int getTotal() const; - int getSharing() const { return m_sharing; } - int getTouchedBy() const { return m_touched_by.count(); } - const Address& getAddress() const { return m_addr; } - void addSample(int value); - - void print(ostream& out) const; -private: - // Private Methods - - // Private copy constructor and assignment operator - // AccessTraceForAddress(const AccessTraceForAddress& obj); - // AccessTraceForAddress& operator=(const AccessTraceForAddress& obj); +#include "mem/ruby/system/NodeID.hh" - // Data Members (m_ prefix) +class Histogram; - Address m_addr; - uint64 m_loads; - uint64 m_stores; - uint64 m_atomics; - uint64 m_total; - uint64 m_user; - uint64 m_sharing; - Set m_touched_by; - Histogram* m_histogram_ptr; +class AccessTraceForAddress +{ + public: + AccessTraceForAddress(); + explicit AccessTraceForAddress(const Address& addr); + ~AccessTraceForAddress(); + + void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, + bool sharing_miss); + int getTotal() const; + int getSharing() const { return m_sharing; } + int getTouchedBy() const { return m_touched_by.count(); } + const Address& getAddress() const { return m_addr; } + void addSample(int value); + + void print(ostream& out) const; + + private: + Address m_addr; + uint64 m_loads; + uint64 m_stores; + uint64 m_atomics; + uint64 m_total; + uint64 m_user; + uint64 m_sharing; + Set m_touched_by; + Histogram* m_histogram_ptr; }; -bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2); - -// Output operator declaration -ostream& operator<<(ostream& out, const AccessTraceForAddress& obj); - -// ******************* Definitions ******************* +inline bool +node_less_then_eq(const AccessTraceForAddress* n1, + const AccessTraceForAddress* n2) +{ + return n1->getTotal() > n2->getTotal(); +} -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const AccessTraceForAddress& obj) +inline ostream& +operator<<(ostream& out, const AccessTraceForAddress& obj) { - obj.print(out); - out << flush; - return out; + obj.print(out); + out << flush; + return out; } -#endif //ACCESSTRACEFORADDRESS_H +#endif // __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__ diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc index c613431ca..2d7d655c0 100644 --- a/src/mem/ruby/profiler/AddressProfiler.cc +++ b/src/mem/ruby/profiler/AddressProfiler.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,272 +26,293 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * AddressProfiler.cc - * - * Description: See AddressProfiler.hh - * - * $Id$ - * - */ - -#include "mem/ruby/profiler/AddressProfiler.hh" +#include "mem/gems_common/Map.hh" +#include "mem/gems_common/PrioHeap.hh" #include "mem/protocol/CacheMsg.hh" #include "mem/ruby/profiler/AccessTraceForAddress.hh" -#include "mem/gems_common/PrioHeap.hh" -#include "mem/gems_common/Map.hh" -#include "mem/ruby/system/System.hh" +#include "mem/ruby/profiler/AddressProfiler.hh" #include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/System.hh" + +typedef AddressProfiler::AddressMap AddressMap; // Helper functions -static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, - Map<Address, - AccessTraceForAddress>* record_map); +AccessTraceForAddress& +lookupTraceForAddress(const Address& addr, AddressMap* record_map) +{ + if (!record_map->exist(addr)) { + record_map->add(addr, AccessTraceForAddress(addr)); + } + return record_map->lookup(addr); +} -static void printSorted(ostream& out, - int num_of_sequencers, - const Map<Address, AccessTraceForAddress>* record_map, - string description); +void +printSorted(ostream& out, int num_of_sequencers, const AddressMap* record_map, + string description) +{ + const int records_printed = 100; + + uint64 misses = 0; + PrioHeap<AccessTraceForAddress*> heap; + Vector<Address> keys = record_map->keys(); + for (int i = 0; i < keys.size(); i++) { + AccessTraceForAddress* record = &(record_map->lookup(keys[i])); + misses += record->getTotal(); + heap.insert(record); + } + + out << "Total_entries_" << description << ": " << keys.size() << endl; + if (g_system_ptr->getProfiler()->getAllInstructions()) + out << "Total_Instructions_" << description << ": " << misses << endl; + else + out << "Total_data_misses_" << description << ": " << misses << endl; + + out << "total | load store atomic | user supervisor | sharing | touched-by" + << endl; + + Histogram remaining_records(1, 100); + Histogram all_records(1, 100); + Histogram remaining_records_log(-1); + Histogram all_records_log(-1); + + // Allows us to track how many lines where touched by n processors + Vector<int64> m_touched_vec; + Vector<int64> m_touched_weighted_vec; + m_touched_vec.setSize(num_of_sequencers+1); + m_touched_weighted_vec.setSize(num_of_sequencers+1); + for (int i = 0; i < m_touched_vec.size(); i++) { + m_touched_vec[i] = 0; + m_touched_weighted_vec[i] = 0; + } + + int counter = 0; + while (heap.size() > 0 && counter < records_printed) { + AccessTraceForAddress* record = heap.extractMin(); + double percent = 100.0 * (record->getTotal() / double(misses)); + out << description << " | " << percent << " % " << *record << endl; + all_records.add(record->getTotal()); + all_records_log.add(record->getTotal()); + counter++; + m_touched_vec[record->getTouchedBy()]++; + m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); + } + + while (heap.size() > 0) { + AccessTraceForAddress* record = heap.extractMin(); + all_records.add(record->getTotal()); + remaining_records.add(record->getTotal()); + all_records_log.add(record->getTotal()); + remaining_records_log.add(record->getTotal()); + m_touched_vec[record->getTouchedBy()]++; + m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); + } + out << endl; + out << "all_records_" << description << ": " + << all_records << endl + << "all_records_log_" << description << ": " + << all_records_log << endl + << "remaining_records_" << description << ": " + << remaining_records << endl + << "remaining_records_log_" << description << ": " + << remaining_records_log << endl + << "touched_by_" << description << ": " + << m_touched_vec << endl + << "touched_by_weighted_" << description << ": " + << m_touched_weighted_vec << endl + << endl; +} AddressProfiler::AddressProfiler(int num_of_sequencers) { - m_dataAccessTrace = new Map<Address, AccessTraceForAddress>; - m_macroBlockAccessTrace = new Map<Address, AccessTraceForAddress>; - m_programCounterAccessTrace = new Map<Address, AccessTraceForAddress>; - m_retryProfileMap = new Map<Address, AccessTraceForAddress>; - m_num_of_sequencers = num_of_sequencers; - clearStats(); + m_dataAccessTrace = new AddressMap; + m_macroBlockAccessTrace = new AddressMap; + m_programCounterAccessTrace = new AddressMap; + m_retryProfileMap = new AddressMap; + m_num_of_sequencers = num_of_sequencers; + clearStats(); } AddressProfiler::~AddressProfiler() { - delete m_dataAccessTrace; - delete m_macroBlockAccessTrace; - delete m_programCounterAccessTrace; - delete m_retryProfileMap; + delete m_dataAccessTrace; + delete m_macroBlockAccessTrace; + delete m_programCounterAccessTrace; + delete m_retryProfileMap; } -void AddressProfiler::setHotLines(bool hot_lines){ - m_hot_lines = hot_lines; -} -void AddressProfiler::setAllInstructions(bool all_instructions){ - m_all_instructions = all_instructions; +void +AddressProfiler::setHotLines(bool hot_lines) +{ + m_hot_lines = hot_lines; } -void AddressProfiler::printStats(ostream& out) const +void +AddressProfiler::setAllInstructions(bool all_instructions) { - if (m_hot_lines) { - out << endl; - out << "AddressProfiler Stats" << endl; - out << "---------------------" << endl; - - out << endl; - out << "sharing_misses: " << m_sharing_miss_counter << endl; - out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl; - out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl; - - out << endl; - out << "Hot Data Blocks" << endl; - out << "---------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_dataAccessTrace, "block_address"); - - out << endl; - out << "Hot MacroData Blocks" << endl; - out << "--------------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace, "macroblock_address"); - - out << "Hot Instructions" << endl; - out << "----------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address"); - } - - if (m_all_instructions){ - out << endl; - out << "All Instructions Profile:" << endl; - out << "-------------------------" << endl; - out << endl; - printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address"); - out << endl; - } - - if (m_retryProfileHisto.size() > 0) { - out << "Retry Profile" << endl; - out << "-------------" << endl; - out << endl; - out << "retry_histogram_absolute: " << m_retryProfileHisto << endl; - out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl; - out << "retry_histogram_read: " << m_retryProfileHistoRead << endl; + m_all_instructions = all_instructions; +} - out << "retry_histogram_percent: "; - m_retryProfileHisto.printPercent(out); - out << endl; +void +AddressProfiler::printStats(ostream& out) const +{ + if (m_hot_lines) { + out << endl; + out << "AddressProfiler Stats" << endl; + out << "---------------------" << endl; + + out << endl; + out << "sharing_misses: " << m_sharing_miss_counter << endl; + out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl; + out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl; + + out << endl; + out << "Hot Data Blocks" << endl; + out << "---------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_dataAccessTrace, + "block_address"); + + out << endl; + out << "Hot MacroData Blocks" << endl; + out << "--------------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace, + "macroblock_address"); + + out << "Hot Instructions" << endl; + out << "----------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, + "pc_address"); + } - printSorted(out, m_num_of_sequencers, m_retryProfileMap, "block_address"); - out << endl; - } + if (m_all_instructions) { + out << endl; + out << "All Instructions Profile:" << endl; + out << "-------------------------" << endl; + out << endl; + printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, + "pc_address"); + out << endl; + } + if (m_retryProfileHisto.size() > 0) { + out << "Retry Profile" << endl; + out << "-------------" << endl; + out << endl; + out << "retry_histogram_absolute: " << m_retryProfileHisto << endl; + out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl; + out << "retry_histogram_read: " << m_retryProfileHistoRead << endl; + + out << "retry_histogram_percent: "; + m_retryProfileHisto.printPercent(out); + out << endl; + + printSorted(out, m_num_of_sequencers, m_retryProfileMap, + "block_address"); + out << endl; + } } -void AddressProfiler::clearStats() +void +AddressProfiler::clearStats() { - // Clear the maps - m_sharing_miss_counter = 0; - m_dataAccessTrace->clear(); - m_macroBlockAccessTrace->clear(); - m_programCounterAccessTrace->clear(); - m_retryProfileMap->clear(); - m_retryProfileHisto.clear(); - m_retryProfileHistoRead.clear(); - m_retryProfileHistoWrite.clear(); - m_getx_sharing_histogram.clear(); - m_gets_sharing_histogram.clear(); + // Clear the maps + m_sharing_miss_counter = 0; + m_dataAccessTrace->clear(); + m_macroBlockAccessTrace->clear(); + m_programCounterAccessTrace->clear(); + m_retryProfileMap->clear(); + m_retryProfileHisto.clear(); + m_retryProfileHistoRead.clear(); + m_retryProfileHistoWrite.clear(); + m_getx_sharing_histogram.clear(); + m_gets_sharing_histogram.clear(); } -void AddressProfiler::profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) +void +AddressProfiler::profileGetX(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, + NodeID requestor) { - Set indirection_set; - indirection_set.addSet(sharers); - indirection_set.addSet(owner); - indirection_set.remove(requestor); - int num_indirections = indirection_set.count(); + Set indirection_set; + indirection_set.addSet(sharers); + indirection_set.addSet(owner); + indirection_set.remove(requestor); + int num_indirections = indirection_set.count(); - m_getx_sharing_histogram.add(num_indirections); - bool indirection_miss = (num_indirections > 0); + m_getx_sharing_histogram.add(num_indirections); + bool indirection_miss = (num_indirections > 0); - addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0), requestor, indirection_miss); + addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0), + requestor, indirection_miss); } -void AddressProfiler::profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor) +void +AddressProfiler::profileGetS(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, + NodeID requestor) { - Set indirection_set; - indirection_set.addSet(owner); - indirection_set.remove(requestor); - int num_indirections = indirection_set.count(); + Set indirection_set; + indirection_set.addSet(owner); + indirection_set.remove(requestor); + int num_indirections = indirection_set.count(); - m_gets_sharing_histogram.add(num_indirections); - bool indirection_miss = (num_indirections > 0); + m_gets_sharing_histogram.add(num_indirections); + bool indirection_miss = (num_indirections > 0); - addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0), requestor, indirection_miss); + addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0), + requestor, indirection_miss); } -void AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss) +void +AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, + CacheRequestType type, + AccessModeType access_mode, NodeID id, + bool sharing_miss) { - if (m_all_instructions) { - if (sharing_miss) { - m_sharing_miss_counter++; + if (m_all_instructions) { + if (sharing_miss) { + m_sharing_miss_counter++; + } + + // record data address trace info + data_addr.makeLineAddress(); + lookupTraceForAddress(data_addr, m_dataAccessTrace). + update(type, access_mode, id, sharing_miss); + + // record macro data address trace info + + // 6 for datablock, 4 to make it 16x more coarse + Address macro_addr(data_addr.maskLowOrderBits(10)); + lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace). + update(type, access_mode, id, sharing_miss); + + // record program counter address trace info + lookupTraceForAddress(pc_addr, m_programCounterAccessTrace). + update(type, access_mode, id, sharing_miss); } - // record data address trace info - data_addr.makeLineAddress(); - lookupTraceForAddress(data_addr, m_dataAccessTrace).update(type, access_mode, id, sharing_miss); - - // record macro data address trace info - Address macro_addr(data_addr.maskLowOrderBits(10)); // 6 for datablock, 4 to make it 16x more coarse - lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace).update(type, access_mode, id, sharing_miss); - - // record program counter address trace info - lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss); - } - - if (m_all_instructions) { - // This code is used if the address profiler is an all-instructions profiler - // record program counter address trace info - lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss); - } -} - -void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, int count) -{ - m_retryProfileHisto.add(count); - if (type == AccessType_Read) { - m_retryProfileHistoRead.add(count); - } else { - m_retryProfileHistoWrite.add(count); - } - if (count > 1) { - lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count); - } -} - -// ***** Normal Functions ****** - -static void printSorted(ostream& out, - int num_of_sequencers, - const Map<Address, AccessTraceForAddress>* record_map, - string description) -{ - const int records_printed = 100; - - uint64 misses = 0; - PrioHeap<AccessTraceForAddress*> heap; - Vector<Address> keys = record_map->keys(); - for(int i=0; i<keys.size(); i++){ - AccessTraceForAddress* record = &(record_map->lookup(keys[i])); - misses += record->getTotal(); - heap.insert(record); - } - - out << "Total_entries_" << description << ": " << keys.size() << endl; - if (g_system_ptr->getProfiler()->getAllInstructions()) - out << "Total_Instructions_" << description << ": " << misses << endl; - else - out << "Total_data_misses_" << description << ": " << misses << endl; - - out << "total | load store atomic | user supervisor | sharing | touched-by" << endl; - - Histogram remaining_records(1, 100); - Histogram all_records(1, 100); - Histogram remaining_records_log(-1); - Histogram all_records_log(-1); - - // Allows us to track how many lines where touched by n processors - Vector<int64> m_touched_vec; - Vector<int64> m_touched_weighted_vec; - m_touched_vec.setSize(num_of_sequencers+1); - m_touched_weighted_vec.setSize(num_of_sequencers+1); - for (int i=0; i<m_touched_vec.size(); i++) { - m_touched_vec[i] = 0; - m_touched_weighted_vec[i] = 0; - } - - int counter = 0; - while((heap.size() > 0) && (counter < records_printed)) { - AccessTraceForAddress* record = heap.extractMin(); - double percent = 100.0*(record->getTotal()/double(misses)); - out << description << " | " << percent << " % " << *record << endl; - all_records.add(record->getTotal()); - all_records_log.add(record->getTotal()); - counter++; - m_touched_vec[record->getTouchedBy()]++; - m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); - } - - while(heap.size() > 0) { - AccessTraceForAddress* record = heap.extractMin(); - all_records.add(record->getTotal()); - remaining_records.add(record->getTotal()); - all_records_log.add(record->getTotal()); - remaining_records_log.add(record->getTotal()); - m_touched_vec[record->getTouchedBy()]++; - m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal(); - } - out << endl; - out << "all_records_" << description << ": " << all_records << endl; - out << "all_records_log_" << description << ": " << all_records_log << endl; - out << "remaining_records_" << description << ": " << remaining_records << endl; - out << "remaining_records_log_" << description << ": " << remaining_records_log << endl; - out << "touched_by_" << description << ": " << m_touched_vec << endl; - out << "touched_by_weighted_" << description << ": " << m_touched_weighted_vec << endl; - out << endl; + if (m_all_instructions) { + // This code is used if the address profiler is an + // all-instructions profiler record program counter address + // trace info + lookupTraceForAddress(pc_addr, m_programCounterAccessTrace). + update(type, access_mode, id, sharing_miss); + } } -static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, Map<Address, AccessTraceForAddress>* record_map) +void +AddressProfiler::profileRetry(const Address& data_addr, AccessType type, + int count) { - if(record_map->exist(addr) == false){ - record_map->add(addr, AccessTraceForAddress(addr)); - } - return record_map->lookup(addr); + m_retryProfileHisto.add(count); + if (type == AccessType_Read) { + m_retryProfileHistoRead.add(count); + } else { + m_retryProfileHistoWrite.add(count); + } + if (count > 1) { + lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count); + } } diff --git a/src/mem/ruby/profiler/AddressProfiler.hh b/src/mem/ruby/profiler/AddressProfiler.hh index 177aa56d6..76dac323f 100644 --- a/src/mem/ruby/profiler/AddressProfiler.hh +++ b/src/mem/ruby/profiler/AddressProfiler.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,89 +26,77 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * AddressProfiler.hh - * - * Description: - * - * $Id$ - * - */ - -#ifndef ADDRESSPROFILER_H -#define ADDRESSPROFILER_H +#ifndef __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__ +#define __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__ +#include "mem/protocol/AccessType.hh" +#include "mem/protocol/CacheMsg.hh" +#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/Global.hh" -#include "mem/ruby/system/NodeID.hh" #include "mem/ruby/common/Histogram.hh" -#include "mem/ruby/common/Address.hh" -#include "mem/protocol/CacheMsg.hh" -#include "mem/protocol/AccessType.hh" +#include "mem/ruby/system/NodeID.hh" class AccessTraceForAddress; class Set; template <class KEY_TYPE, class VALUE_TYPE> class Map; -class AddressProfiler { -public: - // Constructors - AddressProfiler(int num_of_sequencers); - - // Destructor - ~AddressProfiler(); - - // Public Methods - void printStats(ostream& out) const; - void clearStats(); - - void addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss); - void profileRetry(const Address& data_addr, AccessType type, int count); - void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); - void profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor); - - void print(ostream& out) const; - - //added by SS - void setHotLines(bool hot_lines); - void setAllInstructions(bool all_instructions); -private: - // Private Methods - - // Private copy constructor and assignment operator - AddressProfiler(const AddressProfiler& obj); - AddressProfiler& operator=(const AddressProfiler& obj); - - // Data Members (m_ prefix) - int64 m_sharing_miss_counter; - - Map<Address, AccessTraceForAddress>* m_dataAccessTrace; - Map<Address, AccessTraceForAddress>* m_macroBlockAccessTrace; - Map<Address, AccessTraceForAddress>* m_programCounterAccessTrace; - Map<Address, AccessTraceForAddress>* m_retryProfileMap; - Histogram m_retryProfileHisto; - Histogram m_retryProfileHistoWrite; - Histogram m_retryProfileHistoRead; - Histogram m_getx_sharing_histogram; - Histogram m_gets_sharing_histogram; -//added by SS - bool m_hot_lines; - bool m_all_instructions; - - int m_num_of_sequencers; +class AddressProfiler +{ + public: + typedef Map<Address, AccessTraceForAddress> AddressMap; + + public: + AddressProfiler(int num_of_sequencers); + ~AddressProfiler(); + + void printStats(ostream& out) const; + void clearStats(); + + void addTraceSample(Address data_addr, Address pc_addr, + CacheRequestType type, AccessModeType access_mode, + NodeID id, bool sharing_miss); + void profileRetry(const Address& data_addr, AccessType type, int count); + void profileGetX(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, NodeID requestor); + void profileGetS(const Address& datablock, const Address& PC, + const Set& owner, const Set& sharers, NodeID requestor); + + void print(ostream& out) const; + + //added by SS + void setHotLines(bool hot_lines); + void setAllInstructions(bool all_instructions); + + private: + // Private copy constructor and assignment operator + AddressProfiler(const AddressProfiler& obj); + AddressProfiler& operator=(const AddressProfiler& obj); + + int64 m_sharing_miss_counter; + + AddressMap* m_dataAccessTrace; + AddressMap* m_macroBlockAccessTrace; + AddressMap* m_programCounterAccessTrace; + AddressMap* m_retryProfileMap; + Histogram m_retryProfileHisto; + Histogram m_retryProfileHistoWrite; + Histogram m_retryProfileHistoRead; + Histogram m_getx_sharing_histogram; + Histogram m_gets_sharing_histogram; + + //added by SS + bool m_hot_lines; + bool m_all_instructions; + + int m_num_of_sequencers; }; -// Output operator declaration -ostream& operator<<(ostream& out, const AddressProfiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const AddressProfiler& obj) +inline ostream& +operator<<(ostream& out, const AddressProfiler& obj) { - obj.print(out); - out << flush; - return out; + obj.print(out); + out << flush; + return out; } -#endif //ADDRESSPROFILER_H +#endif // __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__ diff --git a/src/mem/ruby/profiler/CacheProfiler.cc b/src/mem/ruby/profiler/CacheProfiler.cc index 50581fcf9..9d12a46ab 100644 --- a/src/mem/ruby/profiler/CacheProfiler.cc +++ b/src/mem/ruby/profiler/CacheProfiler.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,111 +26,113 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * CacheProfiler.C - * - * Description: See CacheProfiler.hh - * - * $Id$ - * - */ - -#include "mem/ruby/profiler/CacheProfiler.hh" -#include "mem/ruby/profiler/AccessTraceForAddress.hh" #include "mem/gems_common/PrioHeap.hh" -#include "mem/ruby/system/System.hh" -#include "mem/ruby/profiler/Profiler.hh" #include "mem/gems_common/Vector.hh" +#include "mem/ruby/profiler/AccessTraceForAddress.hh" +#include "mem/ruby/profiler/CacheProfiler.hh" +#include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/System.hh" CacheProfiler::CacheProfiler(const string& description) { - m_description = description; - m_requestTypeVec_ptr = new Vector<int>; - m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM)); + m_description = description; + m_requestTypeVec_ptr = new Vector<int>; + m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM)); - clearStats(); + clearStats(); } CacheProfiler::~CacheProfiler() { - delete m_requestTypeVec_ptr; + delete m_requestTypeVec_ptr; } -void CacheProfiler::printStats(ostream& out) const +void +CacheProfiler::printStats(ostream& out) const { - out << "Cache Stats: " << m_description << endl; - string description = " " + m_description; - - out << description << "_total_misses: " << m_misses << endl; - out << description << "_total_demand_misses: " << m_demand_misses << endl; - out << description << "_total_prefetches: " << m_prefetches << endl; - out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl; - out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl; - out << endl; - - int requests = 0; - - for(int i=0; i<int(CacheRequestType_NUM); i++) { - requests += m_requestTypeVec_ptr->ref(i); - } - - assert(m_misses == requests); - - if (requests > 0) { - for(int i=0; i<int(CacheRequestType_NUM); i++){ - if (m_requestTypeVec_ptr->ref(i) > 0) { - out << description << "_request_type_" << CacheRequestType_to_string(CacheRequestType(i)) << ": " - << (100.0 * double((m_requestTypeVec_ptr->ref(i)))) / double(requests) - << "%" << endl; - } - } - + out << "Cache Stats: " << m_description << endl; + string description = " " + m_description; + + out << description << "_total_misses: " << m_misses << endl; + out << description << "_total_demand_misses: " << m_demand_misses << endl; + out << description << "_total_prefetches: " << m_prefetches << endl; + out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl; + out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl; out << endl; - for(int i=0; i<AccessModeType_NUM; i++){ - if (m_accessModeTypeHistogram[i] > 0) { - out << description << "_access_mode_type_" << (AccessModeType) i << ": " << m_accessModeTypeHistogram[i] - << " " << (100.0 * m_accessModeTypeHistogram[i]) / requests << "%" << endl; - } + int requests = 0; + + for (int i = 0; i < int(CacheRequestType_NUM); i++) { + requests += m_requestTypeVec_ptr->ref(i); } - } - out << description << "_request_size: " << m_requestSize << endl; - out << endl; + assert(m_misses == requests); + + if (requests > 0) { + for (int i = 0; i < int(CacheRequestType_NUM); i++) { + if (m_requestTypeVec_ptr->ref(i) > 0) { + out << description << "_request_type_" + << CacheRequestType_to_string(CacheRequestType(i)) + << ": " + << 100.0 * (double)m_requestTypeVec_ptr->ref(i) / + (double)requests + << "%" << endl; + } + } + + out << endl; + + for (int i = 0; i < AccessModeType_NUM; i++){ + if (m_accessModeTypeHistogram[i] > 0) { + out << description << "_access_mode_type_" + << (AccessModeType) i << ": " + << m_accessModeTypeHistogram[i] << " " + << 100.0 * m_accessModeTypeHistogram[i] / requests + << "%" << endl; + } + } + } + out << description << "_request_size: " << m_requestSize << endl; + out << endl; } -void CacheProfiler::clearStats() +void +CacheProfiler::clearStats() { - for(int i=0; i<int(CacheRequestType_NUM); i++) { - m_requestTypeVec_ptr->ref(i) = 0; - } - m_requestSize.clear(); - m_misses = 0; - m_demand_misses = 0; - m_prefetches = 0; - m_sw_prefetches = 0; - m_hw_prefetches = 0; - for(int i=0; i<AccessModeType_NUM; i++){ - m_accessModeTypeHistogram[i] = 0; - } + for (int i = 0; i < int(CacheRequestType_NUM); i++) { + m_requestTypeVec_ptr->ref(i) = 0; + } + m_requestSize.clear(); + m_misses = 0; + m_demand_misses = 0; + m_prefetches = 0; + m_sw_prefetches = 0; + m_hw_prefetches = 0; + for (int i = 0; i < AccessModeType_NUM; i++) { + m_accessModeTypeHistogram[i] = 0; + } } -void CacheProfiler::addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit) +void +CacheProfiler::addStatSample(CacheRequestType requestType, + AccessModeType type, int msgSize, + PrefetchBit pfBit) { - m_misses++; - - m_requestTypeVec_ptr->ref(requestType)++; - - m_accessModeTypeHistogram[type]++; - m_requestSize.add(msgSize); - if (pfBit == PrefetchBit_No) { - m_demand_misses++; - } else if (pfBit == PrefetchBit_Yes) { - m_prefetches++; - m_sw_prefetches++; - } else { // must be L1_HW || L2_HW prefetch - m_prefetches++; - m_hw_prefetches++; - } + m_misses++; + + m_requestTypeVec_ptr->ref(requestType)++; + + m_accessModeTypeHistogram[type]++; + m_requestSize.add(msgSize); + if (pfBit == PrefetchBit_No) { + m_demand_misses++; + } else if (pfBit == PrefetchBit_Yes) { + m_prefetches++; + m_sw_prefetches++; + } else { + // must be L1_HW || L2_HW prefetch + m_prefetches++; + m_hw_prefetches++; + } } diff --git a/src/mem/ruby/profiler/CacheProfiler.hh b/src/mem/ruby/profiler/CacheProfiler.hh index 11f189148..7dcdf57f0 100644 --- a/src/mem/ruby/profiler/CacheProfiler.hh +++ b/src/mem/ruby/profiler/CacheProfiler.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,77 +26,58 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * CacheProfiler.hh - * - * Description: - * - * $Id$ - * - */ - -#ifndef CACHEPROFILER_H -#define CACHEPROFILER_H +#ifndef __MEM_RUBY_PROFILER_CACHEPROFILER_HH__ +#define __MEM_RUBY_PROFILER_CACHEPROFILER_HH__ #include <iostream> #include <string> -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/system/NodeID.hh" -#include "mem/ruby/common/Histogram.hh" #include "mem/protocol/AccessModeType.hh" -#include "mem/protocol/PrefetchBit.hh" #include "mem/protocol/CacheRequestType.hh" +#include "mem/protocol/PrefetchBit.hh" +#include "mem/ruby/common/Global.hh" +#include "mem/ruby/common/Histogram.hh" +#include "mem/ruby/system/NodeID.hh" template <class TYPE> class Vector; -class CacheProfiler { -public: - // Constructors - CacheProfiler(const std::string& description); - - // Destructor - ~CacheProfiler(); +class CacheProfiler +{ + public: + CacheProfiler(const std::string& description); + ~CacheProfiler(); - // Public Methods - void printStats(std::ostream& out) const; - void clearStats(); + void printStats(std::ostream& out) const; + void clearStats(); - void addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit); + void addStatSample(CacheRequestType requestType, AccessModeType type, + int msgSize, PrefetchBit pfBit); - void print(std::ostream& out) const; -private: - // Private Methods + void print(std::ostream& out) const; - // Private copy constructor and assignment operator - CacheProfiler(const CacheProfiler& obj); - CacheProfiler& operator=(const CacheProfiler& obj); + private: + // Private copy constructor and assignment operator + CacheProfiler(const CacheProfiler& obj); + CacheProfiler& operator=(const CacheProfiler& obj); - // Data Members (m_ prefix) - std::string m_description; - Histogram m_requestSize; - int64 m_misses; - int64 m_demand_misses; - int64 m_prefetches; - int64 m_sw_prefetches; - int64 m_hw_prefetches; - int64 m_accessModeTypeHistogram[AccessModeType_NUM]; + std::string m_description; + Histogram m_requestSize; + int64 m_misses; + int64 m_demand_misses; + int64 m_prefetches; + int64 m_sw_prefetches; + int64 m_hw_prefetches; + int64 m_accessModeTypeHistogram[AccessModeType_NUM]; - Vector < int >* m_requestTypeVec_ptr; + Vector <int>* m_requestTypeVec_ptr; }; -// Output operator declaration -std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj) +inline std::ostream& +operator<<(std::ostream& out, const CacheProfiler& obj) { - obj.print(out); - out << std::flush; - return out; + obj.print(out); + out << std::flush; + return out; } -#endif //CACHEPROFILER_H +#endif // __MEM_RUBY_PROFILER_CACHEPROFILER_HH__ diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.cc b/src/mem/ruby/profiler/MemCntrlProfiler.cc index b41d7de78..e25719666 100644 --- a/src/mem/ruby/profiler/MemCntrlProfiler.cc +++ b/src/mem/ruby/profiler/MemCntrlProfiler.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -32,19 +31,14 @@ using namespace std; MemCntrlProfiler::MemCntrlProfiler(const string& description, - int banks_per_rank, - int ranks_per_dimm, - int dimms_per_channel) + int banks_per_rank, int ranks_per_dimm, int dimms_per_channel) { m_description = description; m_banks_per_rank = banks_per_rank; m_ranks_per_dimm = ranks_per_dimm; m_dimms_per_channel = dimms_per_channel; - int totalBanks = banks_per_rank * - ranks_per_dimm * - dimms_per_channel; - + int totalBanks = banks_per_rank * ranks_per_dimm * dimms_per_channel; m_memBankCount.setSize(totalBanks); clearStats(); @@ -54,50 +48,65 @@ MemCntrlProfiler::~MemCntrlProfiler() { } -void MemCntrlProfiler::printStats(ostream& out) const -{ - if (m_memReq || m_memRefresh) { // if there's a memory controller at all - uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; - double stallsPerReq = total_stalls * 1.0 / m_memReq; - out << "Memory controller: " << m_description << ":" << endl; - out << " memory_total_requests: " << m_memReq << endl; // does not include refreshes - out << " memory_reads: " << m_memRead << endl; - out << " memory_writes: " << m_memWrite << endl; - out << " memory_refreshes: " << m_memRefresh << endl; - out << " memory_total_request_delays: " << total_stalls << endl; - out << " memory_delays_per_request: " << stallsPerReq << endl; - out << " memory_delays_in_input_queue: " << m_memInputQ << endl; - out << " memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl; - out << " memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl; - // Note: The following "memory stalls" entries are a breakdown of the - // cycles which already showed up in m_memWaitCycles. The order is - // significant; it is the priority of attributing the cycles. - // For example, bank_busy is before arbitration because if the bank was - // busy, we didn't even check arbitration. - // Note: "not old enough" means that since we grouped waiting heads-of-queues - // into batches to avoid starvation, a request in a newer batch - // didn't try to arbitrate yet because there are older requests waiting. - out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl; - out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl; - out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl; - out << " memory_stalls_for_arbitration: " << m_memArbWait << endl; - out << " memory_stalls_for_bus: " << m_memBusBusy << endl; - out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl; - out << " memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl; - out << " memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl; - out << " accesses_per_bank: "; - for (int bank=0; bank < m_memBankCount.size(); bank++) { - out << m_memBankCount[bank] << " "; - } - } else { +void +MemCntrlProfiler::printStats(ostream& out) const +{ + if (!m_memReq && !m_memRefresh) { out << "Memory Controller: " << m_description - << " no stats recorded." << endl; - } + << " no stats recorded." << endl + << endl + << endl; + return; + } + + // if there's a memory controller at all + uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; + double stallsPerReq = total_stalls * 1.0 / m_memReq; + out << "Memory controller: " << m_description << ":" << endl; + + // does not include refreshes + out << " memory_total_requests: " << m_memReq << endl; + out << " memory_reads: " << m_memRead << endl; + out << " memory_writes: " << m_memWrite << endl; + out << " memory_refreshes: " << m_memRefresh << endl; + out << " memory_total_request_delays: " << total_stalls << endl; + out << " memory_delays_per_request: " << stallsPerReq << endl; + out << " memory_delays_in_input_queue: " << m_memInputQ << endl; + out << " memory_delays_behind_head_of_bank_queue: " + << m_memBankQ << endl; + out << " memory_delays_stalled_at_head_of_bank_queue: " + << m_memWaitCycles << endl; + + // Note: The following "memory stalls" entries are a breakdown of + // the cycles which already showed up in m_memWaitCycles. The + // order is significant; it is the priority of attributing the + // cycles. For example, bank_busy is before arbitration because + // if the bank was busy, we didn't even check arbitration. + // Note: "not old enough" means that since we grouped waiting + // heads-of-queues into batches to avoid starvation, a request in + // a newer batch didn't try to arbitrate yet because there are + // older requests waiting. + out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl; + out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl; + out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl; + out << " memory_stalls_for_arbitration: " << m_memArbWait << endl; + out << " memory_stalls_for_bus: " << m_memBusBusy << endl; + out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl; + out << " memory_stalls_for_read_write_turnaround: " + << m_memReadWriteBusy << endl; + out << " memory_stalls_for_read_read_turnaround: " + << m_memDataBusBusy << endl; + out << " accesses_per_bank: "; + + for (int bank = 0; bank < m_memBankCount.size(); bank++) { + out << m_memBankCount[bank] << " "; + } out << endl; out << endl; } -void MemCntrlProfiler::clearStats() +void +MemCntrlProfiler::clearStats() { m_memReq = 0; m_memBankBusy = 0; @@ -115,72 +124,100 @@ void MemCntrlProfiler::clearStats() m_memRandBusy = 0; m_memNotOld = 0; - for (int bank=0; - bank < m_memBankCount.size(); - bank++) { + for (int bank = 0; bank < m_memBankCount.size(); bank++) { m_memBankCount[bank] = 0; } } -void MemCntrlProfiler::profileMemReq(int bank) { - m_memReq++; - m_memBankCount[bank]++; +void +MemCntrlProfiler::profileMemReq(int bank) +{ + m_memReq++; + m_memBankCount[bank]++; } -void MemCntrlProfiler::profileMemBankBusy() { - m_memBankBusy++; +void +MemCntrlProfiler::profileMemBankBusy() +{ + m_memBankBusy++; } -void MemCntrlProfiler::profileMemBusBusy() { - m_memBusBusy++; +void +MemCntrlProfiler::profileMemBusBusy() +{ + m_memBusBusy++; } -void MemCntrlProfiler::profileMemReadWriteBusy() { - m_memReadWriteBusy++; +void +MemCntrlProfiler::profileMemReadWriteBusy() +{ + m_memReadWriteBusy++; } -void MemCntrlProfiler::profileMemDataBusBusy() { - m_memDataBusBusy++; +void +MemCntrlProfiler::profileMemDataBusBusy() +{ + m_memDataBusBusy++; } -void MemCntrlProfiler::profileMemTfawBusy() { - m_memTfawBusy++; +void +MemCntrlProfiler::profileMemTfawBusy() +{ + m_memTfawBusy++; } -void MemCntrlProfiler::profileMemRefresh() { - m_memRefresh++; +void +MemCntrlProfiler::profileMemRefresh() +{ + m_memRefresh++; } -void MemCntrlProfiler::profileMemRead() { - m_memRead++; +void +MemCntrlProfiler::profileMemRead() +{ + m_memRead++; } -void MemCntrlProfiler::profileMemWrite() { - m_memWrite++; +void +MemCntrlProfiler::profileMemWrite() +{ + m_memWrite++; } -void MemCntrlProfiler::profileMemWaitCycles(int cycles) { - m_memWaitCycles += cycles; +void +MemCntrlProfiler::profileMemWaitCycles(int cycles) +{ + m_memWaitCycles += cycles; } -void MemCntrlProfiler::profileMemInputQ(int cycles) { - m_memInputQ += cycles; +void +MemCntrlProfiler::profileMemInputQ(int cycles) +{ + m_memInputQ += cycles; } -void MemCntrlProfiler::profileMemBankQ(int cycles) { - m_memBankQ += cycles; +void +MemCntrlProfiler::profileMemBankQ(int cycles) +{ + m_memBankQ += cycles; } -void MemCntrlProfiler::profileMemArbWait(int cycles) { - m_memArbWait += cycles; +void +MemCntrlProfiler::profileMemArbWait(int cycles) +{ + m_memArbWait += cycles; } -void MemCntrlProfiler::profileMemRandBusy() { - m_memRandBusy++; +void +MemCntrlProfiler::profileMemRandBusy() +{ + m_memRandBusy++; } -void MemCntrlProfiler::profileMemNotOld() { - m_memNotOld++; +void +MemCntrlProfiler::profileMemNotOld() +{ + m_memNotOld++; } diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.hh b/src/mem/ruby/profiler/MemCntrlProfiler.hh index ebedd5185..85c39e0ad 100644 --- a/src/mem/ruby/profiler/MemCntrlProfiler.hh +++ b/src/mem/ruby/profiler/MemCntrlProfiler.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,17 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * MemCntrlProfiler.hh - * - * Description: - * - * $Id$ - * - */ - -#ifndef MEM_CNTRL_PROFILER_H -#define MEM_CNTRL_PROFILER_H +#ifndef __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__ +#define __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__ #include <iostream> #include <string> @@ -47,80 +37,67 @@ template <class TYPE> class Vector; -class MemCntrlProfiler { -public: - // Constructors - MemCntrlProfiler(const std::string& description, - int banks_per_rank, - int ranks_per_dimm, - int dimms_per_channel); - - // Destructor - ~MemCntrlProfiler(); - - // Public Methods - void printStats(std::ostream& out) const; - void clearStats(); - - void profileMemReq(int bank); - void profileMemBankBusy(); - void profileMemBusBusy(); - void profileMemTfawBusy(); - void profileMemReadWriteBusy(); - void profileMemDataBusBusy(); - void profileMemRefresh(); - void profileMemRead(); - void profileMemWrite(); - void profileMemWaitCycles(int cycles); - void profileMemInputQ(int cycles); - void profileMemBankQ(int cycles); - void profileMemArbWait(int cycles); - void profileMemRandBusy(); - void profileMemNotOld(); +class MemCntrlProfiler +{ + public: + MemCntrlProfiler(const std::string& description, int banks_per_rank, + int ranks_per_dimm, int dimms_per_channel); + ~MemCntrlProfiler(); + + void printStats(std::ostream& out) const; + void clearStats(); + + void profileMemReq(int bank); + void profileMemBankBusy(); + void profileMemBusBusy(); + void profileMemTfawBusy(); + void profileMemReadWriteBusy(); + void profileMemDataBusBusy(); + void profileMemRefresh(); + void profileMemRead(); + void profileMemWrite(); + void profileMemWaitCycles(int cycles); + void profileMemInputQ(int cycles); + void profileMemBankQ(int cycles); + void profileMemArbWait(int cycles); + void profileMemRandBusy(); + void profileMemNotOld(); + + void print(std::ostream& out) const; - void print(std::ostream& out) const; private: - // Private Methods - - // Private copy constructor and assignment operator - MemCntrlProfiler(const MemCntrlProfiler& obj); - MemCntrlProfiler& operator=(const MemCntrlProfiler& obj); - - // Data Members (m_ prefix) - std::string m_description; - uint64 m_memReq; - uint64 m_memBankBusy; - uint64 m_memBusBusy; - uint64 m_memTfawBusy; - uint64 m_memReadWriteBusy; - uint64 m_memDataBusBusy; - uint64 m_memRefresh; - uint64 m_memRead; - uint64 m_memWrite; - uint64 m_memWaitCycles; - uint64 m_memInputQ; - uint64 m_memBankQ; - uint64 m_memArbWait; - uint64 m_memRandBusy; - uint64 m_memNotOld; - Vector<uint64> m_memBankCount; - int m_banks_per_rank; - int m_ranks_per_dimm; - int m_dimms_per_channel; + // Private copy constructor and assignment operator + MemCntrlProfiler(const MemCntrlProfiler& obj); + MemCntrlProfiler& operator=(const MemCntrlProfiler& obj); + + std::string m_description; + uint64 m_memReq; + uint64 m_memBankBusy; + uint64 m_memBusBusy; + uint64 m_memTfawBusy; + uint64 m_memReadWriteBusy; + uint64 m_memDataBusBusy; + uint64 m_memRefresh; + uint64 m_memRead; + uint64 m_memWrite; + uint64 m_memWaitCycles; + uint64 m_memInputQ; + uint64 m_memBankQ; + uint64 m_memArbWait; + uint64 m_memRandBusy; + uint64 m_memNotOld; + Vector<uint64> m_memBankCount; + int m_banks_per_rank; + int m_ranks_per_dimm; + int m_dimms_per_channel; }; -// Output operator declaration -std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj) +inline std::ostream& +operator<<(std::ostream& out, const MemCntrlProfiler& obj) { - obj.print(out); - out << std::flush; - return out; + obj.print(out); + out << std::flush; + return out; } -#endif //MEM_CNTRL_PROFILER_H +#endif // __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__ diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc index 365f6cf42..2cc3eddfc 100644 --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -42,34 +42,24 @@ ---------------------------------------------------------------------- */ -/* - * Profiler.cc - * - * Description: See Profiler.hh - * - * $Id$ - * - */ - // Allows use of times() library call, which determines virtual runtime #include <sys/resource.h> #include <sys/times.h> -#include "mem/ruby/profiler/Profiler.hh" -#include "mem/ruby/profiler/AddressProfiler.hh" -#include "mem/ruby/system/System.hh" -#include "mem/ruby/network/Network.hh" +#include "mem/gems_common/Map.hh" #include "mem/gems_common/PrioHeap.hh" +#include "mem/gems_common/util.hh" #include "mem/protocol/CacheMsg.hh" +#include "mem/protocol/MachineType.hh" #include "mem/protocol/Protocol.hh" -#include "mem/gems_common/util.hh" -#include "mem/gems_common/Map.hh" #include "mem/ruby/common/Debug.hh" -#include "mem/protocol/MachineType.hh" - +#include "mem/ruby/network/Network.hh" +#include "mem/ruby/profiler/AddressProfiler.hh" +#include "mem/ruby/profiler/Profiler.hh" +#include "mem/ruby/system/System.hh" #include "mem/ruby/system/System.hh" -extern std::ostream * debug_cout_ptr; +extern std::ostream* debug_cout_ptr; static double process_memory_total(); static double process_memory_resident(); @@ -77,570 +67,623 @@ static double process_memory_resident(); Profiler::Profiler(const Params *p) : SimObject(p) { - m_requestProfileMap_ptr = new Map<string, int>; + m_requestProfileMap_ptr = new Map<string, int>; - m_inst_profiler_ptr = NULL; - m_address_profiler_ptr = NULL; + m_inst_profiler_ptr = NULL; + m_address_profiler_ptr = NULL; - m_real_time_start_time = time(NULL); // Not reset in clearStats() - m_stats_period = 1000000; // Default - m_periodic_output_file_ptr = &cerr; + m_real_time_start_time = time(NULL); // Not reset in clearStats() + m_stats_period = 1000000; // Default + m_periodic_output_file_ptr = &cerr; - m_hot_lines = p->hot_lines; - m_all_instructions = p->all_instructions; + m_hot_lines = p->hot_lines; + m_all_instructions = p->all_instructions; - m_num_of_sequencers = p->num_of_sequencers; + m_num_of_sequencers = p->num_of_sequencers; - m_hot_lines = false; - m_all_instructions = false; + m_hot_lines = false; + m_all_instructions = false; - m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); - m_address_profiler_ptr -> setHotLines(m_hot_lines); - m_address_profiler_ptr -> setAllInstructions(m_all_instructions); + m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_address_profiler_ptr->setHotLines(m_hot_lines); + m_address_profiler_ptr->setAllInstructions(m_all_instructions); - if (m_all_instructions) { - m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); - m_inst_profiler_ptr -> setHotLines(m_hot_lines); - m_inst_profiler_ptr -> setAllInstructions(m_all_instructions); - } + if (m_all_instructions) { + m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); + m_inst_profiler_ptr->setHotLines(m_hot_lines); + m_inst_profiler_ptr->setAllInstructions(m_all_instructions); + } } Profiler::~Profiler() { - if (m_periodic_output_file_ptr != &cerr) { - delete m_periodic_output_file_ptr; - } + if (m_periodic_output_file_ptr != &cerr) { + delete m_periodic_output_file_ptr; + } - delete m_requestProfileMap_ptr; + delete m_requestProfileMap_ptr; } -void Profiler::wakeup() +void +Profiler::wakeup() { - // FIXME - avoid the repeated code - - Vector<integer_t> perProcCycleCount; - perProcCycleCount.setSize(m_num_of_sequencers); + // FIXME - avoid the repeated code - for(int i=0; i < m_num_of_sequencers; i++) { - perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; - // The +1 allows us to avoid division by zero - } + Vector<integer_t> perProcCycleCount; + perProcCycleCount.setSize(m_num_of_sequencers); - (*m_periodic_output_file_ptr) << "ruby_cycles: " - << g_eventQueue_ptr->getTime()-m_ruby_start - << endl; + for (int i = 0; i < m_num_of_sequencers; i++) { + perProcCycleCount[i] = + g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; + // The +1 allows us to avoid division by zero + } - (*m_periodic_output_file_ptr) << "mbytes_resident: " - << process_memory_resident() - << endl; + ostream &out = *m_periodic_output_file_ptr; - (*m_periodic_output_file_ptr) << "mbytes_total: " - << process_memory_total() - << endl; + out << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl + << "mbytes_resident: " << process_memory_resident() << endl + << "mbytes_total: " << process_memory_total() << endl; - if (process_memory_total() > 0) { - (*m_periodic_output_file_ptr) << "resident_ratio: " - << process_memory_resident()/process_memory_total() - << endl; - } + if (process_memory_total() > 0) { + out << "resident_ratio: " + << process_memory_resident() / process_memory_total() << endl; + } - (*m_periodic_output_file_ptr) << "miss_latency: " - << m_allMissLatencyHistogram - << endl; + out << "miss_latency: " << m_allMissLatencyHistogram << endl; - *m_periodic_output_file_ptr << endl; + out << endl; - if (m_all_instructions) { - m_inst_profiler_ptr->printStats(*m_periodic_output_file_ptr); - } + if (m_all_instructions) { + m_inst_profiler_ptr->printStats(out); + } - //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr); - g_eventQueue_ptr->scheduleEvent(this, m_stats_period); + //g_system_ptr->getNetwork()->printStats(out); + g_eventQueue_ptr->scheduleEvent(this, m_stats_period); } -void Profiler::setPeriodicStatsFile(const string& filename) +void +Profiler::setPeriodicStatsFile(const string& filename) { - cout << "Recording periodic statistics to file '" << filename << "' every " - << m_stats_period << " Ruby cycles" << endl; + cout << "Recording periodic statistics to file '" << filename << "' every " + << m_stats_period << " Ruby cycles" << endl; - if (m_periodic_output_file_ptr != &cerr) { - delete m_periodic_output_file_ptr; - } + if (m_periodic_output_file_ptr != &cerr) { + delete m_periodic_output_file_ptr; + } - m_periodic_output_file_ptr = new ofstream(filename.c_str()); - g_eventQueue_ptr->scheduleEvent(this, 1); + m_periodic_output_file_ptr = new ofstream(filename.c_str()); + g_eventQueue_ptr->scheduleEvent(this, 1); } -void Profiler::setPeriodicStatsInterval(integer_t period) +void +Profiler::setPeriodicStatsInterval(integer_t period) { - cout << "Recording periodic statistics every " << m_stats_period - << " Ruby cycles" << endl; + cout << "Recording periodic statistics every " << m_stats_period + << " Ruby cycles" << endl; - m_stats_period = period; - g_eventQueue_ptr->scheduleEvent(this, 1); + m_stats_period = period; + g_eventQueue_ptr->scheduleEvent(this, 1); } -void Profiler::printConfig(ostream& out) const +void +Profiler::printConfig(ostream& out) const { - out << endl; - out << "Profiler Configuration" << endl; - out << "----------------------" << endl; - out << "periodic_stats_period: " << m_stats_period << endl; + out << endl; + out << "Profiler Configuration" << endl; + out << "----------------------" << endl; + out << "periodic_stats_period: " << m_stats_period << endl; } -void Profiler::print(ostream& out) const +void +Profiler::print(ostream& out) const { - out << "[Profiler]"; + out << "[Profiler]"; } -void Profiler::printStats(ostream& out, bool short_stats) +void +Profiler::printStats(ostream& out, bool short_stats) { - out << endl; - if (short_stats) { - out << "SHORT "; - } - out << "Profiler Stats" << endl; - out << "--------------" << endl; - - time_t real_time_current = time(NULL); - double seconds = difftime(real_time_current, m_real_time_start_time); - double minutes = seconds/60.0; - double hours = minutes/60.0; - double days = hours/24.0; - Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start; - - if (!short_stats) { - out << "Elapsed_time_in_seconds: " << seconds << endl; - out << "Elapsed_time_in_minutes: " << minutes << endl; - out << "Elapsed_time_in_hours: " << hours << endl; - out << "Elapsed_time_in_days: " << days << endl; out << endl; - } - - // print the virtual runtimes as well - struct tms vtime; - times(&vtime); - seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0; - minutes = seconds / 60.0; - hours = minutes / 60.0; - days = hours / 24.0; - out << "Virtual_time_in_seconds: " << seconds << endl; - out << "Virtual_time_in_minutes: " << minutes << endl; - out << "Virtual_time_in_hours: " << hours << endl; - out << "Virtual_time_in_days: " << days << endl; - out << endl; - - out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; - out << "Ruby_start_time: " << m_ruby_start << endl; - out << "Ruby_cycles: " << ruby_cycles << endl; - out << endl; - - if (!short_stats) { - out << "mbytes_resident: " << process_memory_resident() << endl; - out << "mbytes_total: " << process_memory_total() << endl; - if (process_memory_total() > 0) { - out << "resident_ratio: " - << process_memory_resident()/process_memory_total() << endl; + if (short_stats) { + out << "SHORT "; + } + out << "Profiler Stats" << endl; + out << "--------------" << endl; + + time_t real_time_current = time(NULL); + double seconds = difftime(real_time_current, m_real_time_start_time); + double minutes = seconds / 60.0; + double hours = minutes / 60.0; + double days = hours / 24.0; + Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start; + + if (!short_stats) { + out << "Elapsed_time_in_seconds: " << seconds << endl; + out << "Elapsed_time_in_minutes: " << minutes << endl; + out << "Elapsed_time_in_hours: " << hours << endl; + out << "Elapsed_time_in_days: " << days << endl; + out << endl; } + + // print the virtual runtimes as well + struct tms vtime; + times(&vtime); + seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0; + minutes = seconds / 60.0; + hours = minutes / 60.0; + days = hours / 24.0; + out << "Virtual_time_in_seconds: " << seconds << endl; + out << "Virtual_time_in_minutes: " << minutes << endl; + out << "Virtual_time_in_hours: " << hours << endl; + out << "Virtual_time_in_days: " << days << endl; out << endl; - } + out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; + out << "Ruby_start_time: " << m_ruby_start << endl; + out << "Ruby_cycles: " << ruby_cycles << endl; + out << endl; + + if (!short_stats) { + out << "mbytes_resident: " << process_memory_resident() << endl; + out << "mbytes_total: " << process_memory_total() << endl; + if (process_memory_total() > 0) { + out << "resident_ratio: " + << process_memory_resident()/process_memory_total() << endl; + } + out << endl; + } - Vector<integer_t> perProcCycleCount; - perProcCycleCount.setSize(m_num_of_sequencers); + Vector<integer_t> perProcCycleCount; + perProcCycleCount.setSize(m_num_of_sequencers); - for(int i=0; i < m_num_of_sequencers; i++) { - perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; - // The +1 allows us to avoid division by zero - } + for (int i = 0; i < m_num_of_sequencers; i++) { + perProcCycleCount[i] = + g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; + // The +1 allows us to avoid division by zero + } - out << "ruby_cycles_executed: " << perProcCycleCount << endl; + out << "ruby_cycles_executed: " << perProcCycleCount << endl; - out << endl; + out << endl; - if (!short_stats) { - out << "Busy Controller Counts:" << endl; - for(int i=0; i < MachineType_NUM; i++) { - for(int j=0; j < MachineType_base_count((MachineType)i); j++) { - MachineID machID; - machID.type = (MachineType)i; - machID.num = j; - out << machID << ":" << m_busyControllerCount[i][j] << " "; - if ((j+1)%8 == 0) { - out << endl; + if (!short_stats) { + out << "Busy Controller Counts:" << endl; + for (int i = 0; i < MachineType_NUM; i++) { + int size = MachineType_base_count((MachineType)i); + for (int j = 0; j < size; j++) { + MachineID machID; + machID.type = (MachineType)i; + machID.num = j; + out << machID << ":" << m_busyControllerCount[i][j] << " "; + if ((j + 1) % 8 == 0) { + out << endl; + } + } + out << endl; } - } - out << endl; + out << endl; + + out << "Busy Bank Count:" << m_busyBankCount << endl; + out << endl; + + out << "sequencer_requests_outstanding: " + << m_sequencer_requests << endl; + out << endl; } - out << endl; - out << "Busy Bank Count:" << m_busyBankCount << endl; - out << endl; + if (!short_stats) { + out << "All Non-Zero Cycle Demand Cache Accesses" << endl; + out << "----------------------------------------" << endl; + out << "miss_latency: " << m_allMissLatencyHistogram << endl; + for (int i = 0; i < m_missLatencyHistograms.size(); i++) { + if (m_missLatencyHistograms[i].size() > 0) { + out << "miss_latency_" << RubyRequestType(i) << ": " + << m_missLatencyHistograms[i] << endl; + } + } + for (int i = 0; i < m_machLatencyHistograms.size(); i++) { + if (m_machLatencyHistograms[i].size() > 0) { + out << "miss_latency_" << GenericMachineType(i) << ": " + << m_machLatencyHistograms[i] << endl; + } + } - out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl; - out << endl; - } + out << endl; - if (!short_stats) { - out << "All Non-Zero Cycle Demand Cache Accesses" << endl; - out << "----------------------------------------" << endl; - out << "miss_latency: " << m_allMissLatencyHistogram << endl; - for(int i=0; i<m_missLatencyHistograms.size(); i++) { - if (m_missLatencyHistograms[i].size() > 0) { - out << "miss_latency_" << RubyRequestType(i) << ": " << m_missLatencyHistograms[i] << endl; - } + out << "All Non-Zero Cycle SW Prefetch Requests" << endl; + out << "------------------------------------" << endl; + out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl; + for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) { + if (m_SWPrefetchLatencyHistograms[i].size() > 0) { + out << "prefetch_latency_" << CacheRequestType(i) << ": " + << m_SWPrefetchLatencyHistograms[i] << endl; + } + } + for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) { + if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) { + out << "prefetch_latency_" << GenericMachineType(i) << ": " + << m_SWPrefetchMachLatencyHistograms[i] << endl; + } + } + out << "prefetch_latency_L2Miss:" + << m_SWPrefetchL2MissLatencyHistogram << endl; + + if (m_all_sharing_histogram.size() > 0) { + out << "all_sharing: " << m_all_sharing_histogram << endl; + out << "read_sharing: " << m_read_sharing_histogram << endl; + out << "write_sharing: " << m_write_sharing_histogram << endl; + + out << "all_sharing_percent: "; + m_all_sharing_histogram.printPercent(out); + out << endl; + + out << "read_sharing_percent: "; + m_read_sharing_histogram.printPercent(out); + out << endl; + + out << "write_sharing_percent: "; + m_write_sharing_histogram.printPercent(out); + out << endl; + + int64 total_miss = m_cache_to_cache + m_memory_to_cache; + out << "all_misses: " << total_miss << endl; + out << "cache_to_cache_misses: " << m_cache_to_cache << endl; + out << "memory_to_cache_misses: " << m_memory_to_cache << endl; + out << "cache_to_cache_percent: " + << 100.0 * (double(m_cache_to_cache) / double(total_miss)) + << endl; + out << "memory_to_cache_percent: " + << 100.0 * (double(m_memory_to_cache) / double(total_miss)) + << endl; + out << endl; + } + + if (m_outstanding_requests.size() > 0) { + out << "outstanding_requests: "; + m_outstanding_requests.printPercent(out); + out << endl; + out << endl; + } } - for(int i=0; i<m_machLatencyHistograms.size(); i++) { - if (m_machLatencyHistograms[i].size() > 0) { - out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl; - } + + if (!short_stats) { + out << "Request vs. RubySystem State Profile" << endl; + out << "--------------------------------" << endl; + out << endl; + + Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys(); + requestProfileKeys.sortVector(); + + for (int i = 0; i < requestProfileKeys.size(); i++) { + int temp_int = + m_requestProfileMap_ptr->lookup(requestProfileKeys[i]); + double percent = (100.0 * double(temp_int)) / double(m_requests); + while (requestProfileKeys[i] != "") { + out << setw(10) << string_split(requestProfileKeys[i], ':'); + } + out << setw(11) << temp_int; + out << setw(14) << percent << endl; + } + out << endl; + + out << "filter_action: " << m_filter_action_histogram << endl; + + if (!m_all_instructions) { + m_address_profiler_ptr->printStats(out); + } + + if (m_all_instructions) { + m_inst_profiler_ptr->printStats(out); + } + + out << endl; + out << "Message Delayed Cycles" << endl; + out << "----------------------" << endl; + out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; + out << "Total_nonPF_delay_cycles: " + << m_delayedCyclesNonPFHistogram << endl; + for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { + out << " virtual_network_" << i << "_delay_cycles: " + << m_delayedCyclesVCHistograms[i] << endl; + } + + printResourceUsage(out); } +} +void +Profiler::printResourceUsage(ostream& out) const +{ out << endl; + out << "Resource Usage" << endl; + out << "--------------" << endl; + + integer_t pagesize = getpagesize(); // page size in bytes + out << "page_size: " << pagesize << endl; + + rusage usage; + getrusage (RUSAGE_SELF, &usage); + + out << "user_time: " << usage.ru_utime.tv_sec << endl; + out << "system_time: " << usage.ru_stime.tv_sec << endl; + out << "page_reclaims: " << usage.ru_minflt << endl; + out << "page_faults: " << usage.ru_majflt << endl; + out << "swaps: " << usage.ru_nswap << endl; + out << "block_inputs: " << usage.ru_inblock << endl; + out << "block_outputs: " << usage.ru_oublock << endl; +} - out << "All Non-Zero Cycle SW Prefetch Requests" << endl; - out << "------------------------------------" << endl; - out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl; - for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) { - if (m_SWPrefetchLatencyHistograms[i].size() > 0) { - out << "prefetch_latency_" << CacheRequestType(i) << ": " << m_SWPrefetchLatencyHistograms[i] << endl; - } +void +Profiler::clearStats() +{ + m_ruby_start = g_eventQueue_ptr->getTime(); + + m_cycles_executed_at_start.setSize(m_num_of_sequencers); + for (int i = 0; i < m_num_of_sequencers; i++) { + if (g_system_ptr == NULL) { + m_cycles_executed_at_start[i] = 0; + } else { + m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i); + } } - for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) { - if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) { - out << "prefetch_latency_" << GenericMachineType(i) << ": " << m_SWPrefetchMachLatencyHistograms[i] << endl; - } + + m_busyControllerCount.setSize(MachineType_NUM); // all machines + for (int i = 0; i < MachineType_NUM; i++) { + int size = MachineType_base_count((MachineType)i); + m_busyControllerCount[i].setSize(size); + for (int j = 0; j < size; j++) { + m_busyControllerCount[i][j] = 0; + } } - out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl; - - if (m_all_sharing_histogram.size() > 0) { - out << "all_sharing: " << m_all_sharing_histogram << endl; - out << "read_sharing: " << m_read_sharing_histogram << endl; - out << "write_sharing: " << m_write_sharing_histogram << endl; - - out << "all_sharing_percent: "; m_all_sharing_histogram.printPercent(out); out << endl; - out << "read_sharing_percent: "; m_read_sharing_histogram.printPercent(out); out << endl; - out << "write_sharing_percent: "; m_write_sharing_histogram.printPercent(out); out << endl; - - int64 total_miss = m_cache_to_cache + m_memory_to_cache; - out << "all_misses: " << total_miss << endl; - out << "cache_to_cache_misses: " << m_cache_to_cache << endl; - out << "memory_to_cache_misses: " << m_memory_to_cache << endl; - out << "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache) / double(total_miss)) << endl; - out << "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache) / double(total_miss)) << endl; - out << endl; + m_busyBankCount = 0; + + m_delayedCyclesHistogram.clear(); + m_delayedCyclesNonPFHistogram.clear(); + int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks(); + m_delayedCyclesVCHistograms.setSize(size); + for (int i = 0; i < size; i++) { + m_delayedCyclesVCHistograms[i].clear(); } - if (m_outstanding_requests.size() > 0) { - out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl; - out << endl; + m_missLatencyHistograms.setSize(RubyRequestType_NUM); + for (int i = 0; i < m_missLatencyHistograms.size(); i++) { + m_missLatencyHistograms[i].clear(200); } - } - - if (!short_stats) { - out << "Request vs. RubySystem State Profile" << endl; - out << "--------------------------------" << endl; - out << endl; + m_machLatencyHistograms.setSize(GenericMachineType_NUM+1); + for (int i = 0; i < m_machLatencyHistograms.size(); i++) { + m_machLatencyHistograms[i].clear(200); + } + m_allMissLatencyHistogram.clear(200); - Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys(); - requestProfileKeys.sortVector(); - - for(int i=0; i<requestProfileKeys.size(); i++) { - int temp_int = m_requestProfileMap_ptr->lookup(requestProfileKeys[i]); - double percent = (100.0*double(temp_int))/double(m_requests); - while (requestProfileKeys[i] != "") { - out << setw(10) << string_split(requestProfileKeys[i], ':'); - } - out << setw(11) << temp_int; - out << setw(14) << percent << endl; + m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM); + for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) { + m_SWPrefetchLatencyHistograms[i].clear(200); } - out << endl; + m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1); + for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) { + m_SWPrefetchMachLatencyHistograms[i].clear(200); + } + m_allSWPrefetchLatencyHistogram.clear(200); - out << "filter_action: " << m_filter_action_histogram << endl; + m_sequencer_requests.clear(); + m_read_sharing_histogram.clear(); + m_write_sharing_histogram.clear(); + m_all_sharing_histogram.clear(); + m_cache_to_cache = 0; + m_memory_to_cache = 0; - if (!m_all_instructions) { - m_address_profiler_ptr->printStats(out); - } + // clear HashMaps + m_requestProfileMap_ptr->clear(); - if (m_all_instructions) { - m_inst_profiler_ptr->printStats(out); - } + // count requests profiled + m_requests = 0; - out << endl; - out << "Message Delayed Cycles" << endl; - out << "----------------------" << endl; - out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; - out << "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram << endl; - for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { - out << " virtual_network_" << i << "_delay_cycles: " << m_delayedCyclesVCHistograms[i] << endl; - } + m_outstanding_requests.clear(); + m_outstanding_persistent_requests.clear(); - printResourceUsage(out); - } + // Flush the prefetches through the system - used so that there + // are no outstanding requests after stats are cleared + //g_eventQueue_ptr->triggerAllEvents(); + // update the start time + m_ruby_start = g_eventQueue_ptr->getTime(); } -void Profiler::printResourceUsage(ostream& out) const +void +Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) { - out << endl; - out << "Resource Usage" << endl; - out << "--------------" << endl; - - integer_t pagesize = getpagesize(); // page size in bytes - out << "page_size: " << pagesize << endl; - - rusage usage; - getrusage (RUSAGE_SELF, &usage); - - out << "user_time: " << usage.ru_utime.tv_sec << endl; - out << "system_time: " << usage.ru_stime.tv_sec << endl; - out << "page_reclaims: " << usage.ru_minflt << endl; - out << "page_faults: " << usage.ru_majflt << endl; - out << "swaps: " << usage.ru_nswap << endl; - out << "block_inputs: " << usage.ru_inblock << endl; - out << "block_outputs: " << usage.ru_oublock << endl; + if (msg.getType() != CacheRequestType_IFETCH) { + // Note: The following line should be commented out if you + // want to use the special profiling that is part of the GS320 + // protocol + + // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be + // profiled by the AddressProfiler + m_address_profiler_ptr-> + addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), + msg.getType(), msg.getAccessMode(), id, false); + } } -void Profiler::clearStats() +void +Profiler::profileSharing(const Address& addr, AccessType type, + NodeID requestor, const Set& sharers, + const Set& owner) { - m_ruby_start = g_eventQueue_ptr->getTime(); + Set set_contacted(owner); + if (type == AccessType_Write) { + set_contacted.addSet(sharers); + } + set_contacted.remove(requestor); + int number_contacted = set_contacted.count(); - m_cycles_executed_at_start.setSize(m_num_of_sequencers); - for (int i=0; i < m_num_of_sequencers; i++) { - if (g_system_ptr == NULL) { - m_cycles_executed_at_start[i] = 0; + if (type == AccessType_Write) { + m_write_sharing_histogram.add(number_contacted); } else { - m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i); + m_read_sharing_histogram.add(number_contacted); } - } + m_all_sharing_histogram.add(number_contacted); - m_busyControllerCount.setSize(MachineType_NUM); // all machines - for(int i=0; i < MachineType_NUM; i++) { - m_busyControllerCount[i].setSize(MachineType_base_count((MachineType)i)); - for(int j=0; j < MachineType_base_count((MachineType)i); j++) { - m_busyControllerCount[i][j] = 0; + if (number_contacted == 0) { + m_memory_to_cache++; + } else { + m_cache_to_cache++; } - } - m_busyBankCount = 0; - - m_delayedCyclesHistogram.clear(); - m_delayedCyclesNonPFHistogram.clear(); - m_delayedCyclesVCHistograms.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks()); - for (int i = 0; i < RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i++) { - m_delayedCyclesVCHistograms[i].clear(); - } - - m_missLatencyHistograms.setSize(RubyRequestType_NUM); - for(int i=0; i<m_missLatencyHistograms.size(); i++) { - m_missLatencyHistograms[i].clear(200); - } - m_machLatencyHistograms.setSize(GenericMachineType_NUM+1); - for(int i=0; i<m_machLatencyHistograms.size(); i++) { - m_machLatencyHistograms[i].clear(200); - } - m_allMissLatencyHistogram.clear(200); - - m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM); - for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) { - m_SWPrefetchLatencyHistograms[i].clear(200); - } - m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1); - for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) { - m_SWPrefetchMachLatencyHistograms[i].clear(200); - } - m_allSWPrefetchLatencyHistogram.clear(200); - - m_sequencer_requests.clear(); - m_read_sharing_histogram.clear(); - m_write_sharing_histogram.clear(); - m_all_sharing_histogram.clear(); - m_cache_to_cache = 0; - m_memory_to_cache = 0; - - // clear HashMaps - m_requestProfileMap_ptr->clear(); - - // count requests profiled - m_requests = 0; - - m_outstanding_requests.clear(); - m_outstanding_persistent_requests.clear(); - - // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared - //g_eventQueue_ptr->triggerAllEvents(); - - // update the start time - m_ruby_start = g_eventQueue_ptr->getTime(); -} - -void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) -{ - if (msg.getType() != CacheRequestType_IFETCH) { - - // Note: The following line should be commented out if you want to - // use the special profiling that is part of the GS320 protocol - - // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler - m_address_profiler_ptr->addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false); - } } -void Profiler::profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner) +void +Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) { - Set set_contacted(owner); - if (type == AccessType_Write) { - set_contacted.addSet(sharers); - } - set_contacted.remove(requestor); - int number_contacted = set_contacted.count(); - - if (type == AccessType_Write) { - m_write_sharing_histogram.add(number_contacted); - } else { - m_read_sharing_histogram.add(number_contacted); - } - m_all_sharing_histogram.add(number_contacted); - - if (number_contacted == 0) { - m_memory_to_cache++; - } else { - m_cache_to_cache++; - } - -} - -void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) { - assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); - m_delayedCyclesHistogram.add(delayCycles); - m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); - if (virtualNetwork != 0) { - m_delayedCyclesNonPFHistogram.add(delayCycles); - } + assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); + m_delayedCyclesHistogram.add(delayCycles); + m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); + if (virtualNetwork != 0) { + m_delayedCyclesNonPFHistogram.add(delayCycles); + } } // profiles original cache requests including PUTs -void Profiler::profileRequest(const string& requestStr) +void +Profiler::profileRequest(const string& requestStr) { - m_requests++; + m_requests++; - if (m_requestProfileMap_ptr->exist(requestStr)) { - (m_requestProfileMap_ptr->lookup(requestStr))++; - } else { - m_requestProfileMap_ptr->add(requestStr, 1); - } + if (m_requestProfileMap_ptr->exist(requestStr)) { + (m_requestProfileMap_ptr->lookup(requestStr))++; + } else { + m_requestProfileMap_ptr->add(requestStr, 1); + } } -void Profiler::controllerBusy(MachineID machID) +void +Profiler::controllerBusy(MachineID machID) { - m_busyControllerCount[(int)machID.type][(int)machID.num]++; + m_busyControllerCount[(int)machID.type][(int)machID.num]++; } -void Profiler::profilePFWait(Time waitTime) +void +Profiler::profilePFWait(Time waitTime) { - m_prefetchWaitHistogram.add(waitTime); + m_prefetchWaitHistogram.add(waitTime); } -void Profiler::bankBusy() +void +Profiler::bankBusy() { - m_busyBankCount++; + m_busyBankCount++; } // non-zero cycle demand request -void Profiler::missLatency(Time t, RubyRequestType type) +void +Profiler::missLatency(Time t, RubyRequestType type) { - m_allMissLatencyHistogram.add(t); - m_missLatencyHistograms[type].add(t); + m_allMissLatencyHistogram.add(t); + m_missLatencyHistograms[type].add(t); } // non-zero cycle prefetch request -void Profiler::swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach) +void +Profiler::swPrefetchLatency(Time t, CacheRequestType type, + GenericMachineType respondingMach) { - m_allSWPrefetchLatencyHistogram.add(t); - m_SWPrefetchLatencyHistograms[type].add(t); - m_SWPrefetchMachLatencyHistograms[respondingMach].add(t); - if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) { - m_SWPrefetchL2MissLatencyHistogram.add(t); - } + m_allSWPrefetchLatencyHistogram.add(t); + m_SWPrefetchLatencyHistograms[type].add(t); + m_SWPrefetchMachLatencyHistograms[respondingMach].add(t); + if (respondingMach == GenericMachineType_Directory || + respondingMach == GenericMachineType_NUM) { + m_SWPrefetchL2MissLatencyHistogram.add(t); + } } -void Profiler::profileTransition(const string& component, NodeID version, Address addr, - const string& state, const string& event, - const string& next_state, const string& note) +void +Profiler::profileTransition(const string& component, NodeID version, + Address addr, const string& state, const string& event, + const string& next_state, const string& note) { - const int EVENT_SPACES = 20; - const int ID_SPACES = 3; - const int TIME_SPACES = 7; - const int COMP_SPACES = 10; - const int STATE_SPACES = 6; - - if ((g_debug_ptr->getDebugTime() > 0) && - (g_eventQueue_ptr->getTime() >= g_debug_ptr->getDebugTime())) { - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << version << " "; - (* debug_cout_ptr) << setw(COMP_SPACES) << component; - (* debug_cout_ptr) << setw(EVENT_SPACES) << event << " "; - - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(STATE_SPACES) << state; - (* debug_cout_ptr) << ">"; - (* debug_cout_ptr).flags(ios::left); - (* debug_cout_ptr) << setw(STATE_SPACES) << next_state; - - (* debug_cout_ptr) << " " << addr << " " << note; - - (* debug_cout_ptr) << endl; - } + const int EVENT_SPACES = 20; + const int ID_SPACES = 3; + const int TIME_SPACES = 7; + const int COMP_SPACES = 10; + const int STATE_SPACES = 6; + + if (g_debug_ptr->getDebugTime() <= 0 || + g_eventQueue_ptr->getTime() < g_debug_ptr->getDebugTime()) + return; + + ostream &out = *debug_cout_ptr; + out.flags(ios::right); + out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; + out << setw(ID_SPACES) << version << " "; + out << setw(COMP_SPACES) << component; + out << setw(EVENT_SPACES) << event << " "; + + out.flags(ios::right); + out << setw(STATE_SPACES) << state; + out << ">"; + out.flags(ios::left); + out << setw(STATE_SPACES) << next_state; + + out << " " << addr << " " << note; + + out << endl; } // Helper function -static double process_memory_total() +static double +process_memory_total() { - const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, - ifstream proc_file; - proc_file.open("/proc/self/statm"); - int total_size_in_pages = 0; - int res_size_in_pages = 0; - proc_file >> total_size_in_pages; - proc_file >> res_size_in_pages; - return double(total_size_in_pages)*MULTIPLIER; // size in megabytes + // 4kB page size, 1024*1024 bytes per MB, + const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); + ifstream proc_file; + proc_file.open("/proc/self/statm"); + int total_size_in_pages = 0; + int res_size_in_pages = 0; + proc_file >> total_size_in_pages; + proc_file >> res_size_in_pages; + return double(total_size_in_pages) * MULTIPLIER; // size in megabytes } -static double process_memory_resident() +static double +process_memory_resident() { - const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, - ifstream proc_file; - proc_file.open("/proc/self/statm"); - int total_size_in_pages = 0; - int res_size_in_pages = 0; - proc_file >> total_size_in_pages; - proc_file >> res_size_in_pages; - return double(res_size_in_pages)*MULTIPLIER; // size in megabytes + // 4kB page size, 1024*1024 bytes per MB, + const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); + ifstream proc_file; + proc_file.open("/proc/self/statm"); + int total_size_in_pages = 0; + int res_size_in_pages = 0; + proc_file >> total_size_in_pages; + proc_file >> res_size_in_pages; + return double(res_size_in_pages) * MULTIPLIER; // size in megabytes } -void Profiler::rubyWatch(int id){ +void +Profiler::rubyWatch(int id) +{ uint64 tr = 0; Address watch_address = Address(tr); const int ID_SPACES = 3; const int TIME_SPACES = 7; - (* debug_cout_ptr).flags(ios::right); - (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; - (* debug_cout_ptr) << setw(ID_SPACES) << id << " " - << "RUBY WATCH " - << watch_address - << endl; + ostream &out = *debug_cout_ptr; + + out.flags(ios::right); + out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; + out << setw(ID_SPACES) << id << " " + << "RUBY WATCH " << watch_address << endl; - if(!m_watch_address_list_ptr->exist(watch_address)){ - m_watch_address_list_ptr->add(watch_address, 1); + if (!m_watch_address_list_ptr->exist(watch_address)) { + m_watch_address_list_ptr->add(watch_address, 1); } } -bool Profiler::watchAddress(Address addr){ +bool +Profiler::watchAddress(Address addr) +{ if (m_watch_address_list_ptr->exist(addr)) - return true; + return true; else - return false; + return false; } Profiler * diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh index 3ae1f5e31..bf4bf8a50 100644 --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -42,35 +42,24 @@ ---------------------------------------------------------------------- */ -/* - * Profiler.hh - * - * Description: - * - * $Id$ - * - */ - -#ifndef PROFILER_H -#define PROFILER_H - -#include "mem/ruby/libruby.hh" +#ifndef __MEM_RUBY_PROFILER_PROFILER_HH__ +#define __MEM_RUBY_PROFILER_PROFILER_HH__ -#include "mem/ruby/common/Global.hh" -#include "mem/protocol/GenericMachineType.hh" -#include "mem/ruby/common/Histogram.hh" -#include "mem/ruby/common/Consumer.hh" #include "mem/protocol/AccessModeType.hh" #include "mem/protocol/AccessType.hh" -#include "mem/ruby/system/NodeID.hh" -#include "mem/ruby/system/MachineID.hh" +#include "mem/protocol/CacheRequestType.hh" +#include "mem/protocol/GenericMachineType.hh" +#include "mem/protocol/GenericRequestType.hh" #include "mem/protocol/PrefetchBit.hh" #include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Consumer.hh" +#include "mem/ruby/common/Global.hh" +#include "mem/ruby/common/Histogram.hh" #include "mem/ruby/common/Set.hh" -#include "mem/protocol/CacheRequestType.hh" -#include "mem/protocol/GenericRequestType.hh" +#include "mem/ruby/libruby.hh" +#include "mem/ruby/system/MachineID.hh" #include "mem/ruby/system/MemoryControl.hh" - +#include "mem/ruby/system/NodeID.hh" #include "params/RubyProfiler.hh" #include "sim/sim_object.hh" @@ -79,155 +68,165 @@ class AddressProfiler; template <class KEY_TYPE, class VALUE_TYPE> class Map; -class Profiler : public SimObject, public Consumer { -public: - // Constructors +class Profiler : public SimObject, public Consumer +{ + public: typedef RubyProfilerParams Params; - Profiler(const Params *); + Profiler(const Params *); + ~Profiler(); - // Destructor - ~Profiler(); + void wakeup(); - // Public Methods - void wakeup(); + void setPeriodicStatsFile(const string& filename); + void setPeriodicStatsInterval(integer_t period); - void setPeriodicStatsFile(const string& filename); - void setPeriodicStatsInterval(integer_t period); + void printStats(ostream& out, bool short_stats=false); + void printShortStats(ostream& out) { printStats(out, true); } + void printTraceStats(ostream& out) const; + void clearStats(); + void printConfig(ostream& out) const; + void printResourceUsage(ostream& out) const; - void printStats(ostream& out, bool short_stats=false); - void printShortStats(ostream& out) { printStats(out, true); } - void printTraceStats(ostream& out) const; - void clearStats(); - void printConfig(ostream& out) const; - void printResourceUsage(ostream& out) const; + AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } + AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } - AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } - AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } + void addAddressTraceSample(const CacheMsg& msg, NodeID id); - void addAddressTraceSample(const CacheMsg& msg, NodeID id); + void profileRequest(const string& requestStr); + void profileSharing(const Address& addr, AccessType type, + NodeID requestor, const Set& sharers, + const Set& owner); - void profileRequest(const string& requestStr); - void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner); + void profileMulticastRetry(const Address& addr, int count); - void profileMulticastRetry(const Address& addr, int count); + void profileFilterAction(int action); - void profileFilterAction(int action); + void profileConflictingRequests(const Address& addr); - void profileConflictingRequests(const Address& addr); - void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); } - void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } - void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } + void + profileOutstandingRequest(int outstanding) + { + m_outstanding_requests.add(outstanding); + } - void recordPrediction(bool wasGood, bool wasPredicted); + void + profileOutstandingPersistentRequest(int outstanding) + { + m_outstanding_persistent_requests.add(outstanding); + } - void startTransaction(int cpu); - void endTransaction(int cpu); - void profilePFWait(Time waitTime); + void + profileAverageLatencyEstimate(int latency) + { + m_average_latency_estimate.add(latency); + } - void controllerBusy(MachineID machID); - void bankBusy(); - void missLatency(Time t, RubyRequestType type); - void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); - void sequencerRequests(int num) { m_sequencer_requests.add(num); } + void recordPrediction(bool wasGood, bool wasPredicted); - void profileTransition(const string& component, NodeID version, Address addr, - const string& state, const string& event, - const string& next_state, const string& note); - void profileMsgDelay(int virtualNetwork, int delayCycles); + void startTransaction(int cpu); + void endTransaction(int cpu); + void profilePFWait(Time waitTime); - void print(ostream& out) const; + void controllerBusy(MachineID machID); + void bankBusy(); + void missLatency(Time t, RubyRequestType type); + void swPrefetchLatency(Time t, CacheRequestType type, + GenericMachineType respondingMach); + void sequencerRequests(int num) { m_sequencer_requests.add(num); } - void rubyWatch(int proc); - bool watchAddress(Address addr); + void profileTransition(const string& component, NodeID version, + Address addr, const string& state, + const string& event, const string& next_state, + const string& note); + void profileMsgDelay(int virtualNetwork, int delayCycles); - // return Ruby's start time - Time getRubyStartTime(){ - return m_ruby_start; - } + void print(ostream& out) const; - //added by SS - bool getHotLines() { return m_hot_lines; } - bool getAllInstructions() { return m_all_instructions; } + void rubyWatch(int proc); + bool watchAddress(Address addr); -private: + // return Ruby's start time + Time + getRubyStartTime() + { + return m_ruby_start; + } - // Private copy constructor and assignment operator - Profiler(const Profiler& obj); - Profiler& operator=(const Profiler& obj); + // added by SS + bool getHotLines() { return m_hot_lines; } + bool getAllInstructions() { return m_all_instructions; } - // Data Members (m_ prefix) - AddressProfiler* m_address_profiler_ptr; - AddressProfiler* m_inst_profiler_ptr; + private: + // Private copy constructor and assignment operator + Profiler(const Profiler& obj); + Profiler& operator=(const Profiler& obj); - Vector<int64> m_instructions_executed_at_start; - Vector<int64> m_cycles_executed_at_start; + AddressProfiler* m_address_profiler_ptr; + AddressProfiler* m_inst_profiler_ptr; - ostream* m_periodic_output_file_ptr; - integer_t m_stats_period; + Vector<int64> m_instructions_executed_at_start; + Vector<int64> m_cycles_executed_at_start; - Time m_ruby_start; - time_t m_real_time_start_time; + ostream* m_periodic_output_file_ptr; + integer_t m_stats_period; - Vector < Vector < integer_t > > m_busyControllerCount; - integer_t m_busyBankCount; - Histogram m_multicast_retry_histogram; + Time m_ruby_start; + time_t m_real_time_start_time; - Histogram m_filter_action_histogram; - Histogram m_tbeProfile; + Vector <Vector<integer_t> > m_busyControllerCount; + integer_t m_busyBankCount; + Histogram m_multicast_retry_histogram; - Histogram m_sequencer_requests; - Histogram m_read_sharing_histogram; - Histogram m_write_sharing_histogram; - Histogram m_all_sharing_histogram; - int64 m_cache_to_cache; - int64 m_memory_to_cache; + Histogram m_filter_action_histogram; + Histogram m_tbeProfile; - Histogram m_prefetchWaitHistogram; + Histogram m_sequencer_requests; + Histogram m_read_sharing_histogram; + Histogram m_write_sharing_histogram; + Histogram m_all_sharing_histogram; + int64 m_cache_to_cache; + int64 m_memory_to_cache; - Vector<Histogram> m_missLatencyHistograms; - Vector<Histogram> m_machLatencyHistograms; - Histogram m_allMissLatencyHistogram; + Histogram m_prefetchWaitHistogram; - Histogram m_allSWPrefetchLatencyHistogram; - Histogram m_SWPrefetchL2MissLatencyHistogram; - Vector<Histogram> m_SWPrefetchLatencyHistograms; - Vector<Histogram> m_SWPrefetchMachLatencyHistograms; + Vector<Histogram> m_missLatencyHistograms; + Vector<Histogram> m_machLatencyHistograms; + Histogram m_allMissLatencyHistogram; - Histogram m_delayedCyclesHistogram; - Histogram m_delayedCyclesNonPFHistogram; - Vector<Histogram> m_delayedCyclesVCHistograms; + Histogram m_allSWPrefetchLatencyHistogram; + Histogram m_SWPrefetchL2MissLatencyHistogram; + Vector<Histogram> m_SWPrefetchLatencyHistograms; + Vector<Histogram> m_SWPrefetchMachLatencyHistograms; - Histogram m_outstanding_requests; - Histogram m_outstanding_persistent_requests; + Histogram m_delayedCyclesHistogram; + Histogram m_delayedCyclesNonPFHistogram; + Vector<Histogram> m_delayedCyclesVCHistograms; + + Histogram m_outstanding_requests; + Histogram m_outstanding_persistent_requests; + + Histogram m_average_latency_estimate; - Histogram m_average_latency_estimate; + Map<Address, int>* m_watch_address_list_ptr; + // counts all initiated cache request including PUTs + int m_requests; + Map <string, int>* m_requestProfileMap_ptr; - Map<Address, int>* m_watch_address_list_ptr; - // counts all initiated cache request including PUTs - int m_requests; - Map <string, int>* m_requestProfileMap_ptr; + //added by SS + bool m_hot_lines; + bool m_all_instructions; - //added by SS - bool m_hot_lines; - bool m_all_instructions; - - int m_num_of_sequencers; + int m_num_of_sequencers; }; -// Output operator declaration -ostream& operator<<(ostream& out, const Profiler& obj); - -// ******************* Definitions ******************* - -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const Profiler& obj) +inline ostream& +operator<<(ostream& out, const Profiler& obj) { - obj.print(out); - out << flush; - return out; + obj.print(out); + out << flush; + return out; } -#endif //PROFILER_H +#endif // __MEM_RUBY_PROFILER_PROFILER_HH__ diff --git a/src/mem/ruby/profiler/StoreTrace.cc b/src/mem/ruby/profiler/StoreTrace.cc index 4d4e4798d..ce42560b6 100644 --- a/src/mem/ruby/profiler/StoreTrace.cc +++ b/src/mem/ruby/profiler/StoreTrace.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,132 +26,130 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - */ - -#include "mem/ruby/profiler/StoreTrace.hh" #include "mem/ruby/eventqueue/RubyEventQueue.hh" +#include "mem/ruby/profiler/StoreTrace.hh" -bool StoreTrace::s_init = false; // Total number of store lifetimes of all lines -int64 StoreTrace::s_total_samples = 0; // Total number of store lifetimes of all lines +bool StoreTrace::s_init = false; // Total number of store lifetimes of + // all lines +int64 StoreTrace::s_total_samples = 0; // Total number of store + // lifetimes of all lines Histogram* StoreTrace::s_store_count_ptr = NULL; Histogram* StoreTrace::s_store_first_to_stolen_ptr = NULL; Histogram* StoreTrace::s_store_last_to_stolen_ptr = NULL; Histogram* StoreTrace::s_store_first_to_last_ptr = NULL; -StoreTrace::StoreTrace(const Address& addr) : - m_store_count(-1), m_store_first_to_stolen(-1), m_store_last_to_stolen(-1), m_store_first_to_last(-1) +StoreTrace::StoreTrace(const Address& addr) + : m_store_count(-1), m_store_first_to_stolen(-1), + m_store_last_to_stolen(-1), m_store_first_to_last(-1) { - StoreTrace::initSummary(); - m_addr = addr; - m_total_samples = 0; - m_last_writer = -1; // Really -1 isn't valid, so this will trigger the initilization code - m_stores_this_interval = 0; + StoreTrace::initSummary(); + m_addr = addr; + m_total_samples = 0; + + // Really -1 isn't valid, so this will trigger the initilization code + m_last_writer = -1; + m_stores_this_interval = 0; } StoreTrace::~StoreTrace() { } -void StoreTrace::print(ostream& out) const +void +StoreTrace::print(ostream& out) const { - out << m_addr; - out << " total_samples: " << m_total_samples << endl; - out << "store_count: " << m_store_count << endl; - out << "store_first_to_stolen: " << m_store_first_to_stolen << endl; - out << "store_last_to_stolen: " << m_store_last_to_stolen << endl; - out << "store_first_to_last: " << m_store_first_to_last << endl; + out << m_addr + << " total_samples: " << m_total_samples << endl + << "store_count: " << m_store_count << endl + << "store_first_to_stolen: " << m_store_first_to_stolen << endl + << "store_last_to_stolen: " << m_store_last_to_stolen << endl + << "store_first_to_last: " << m_store_first_to_last << endl; } -// Class method -void StoreTrace::initSummary() +void +StoreTrace::initSummary() { - if (!s_init) { - s_total_samples = 0; - s_store_count_ptr = new Histogram(-1); - s_store_first_to_stolen_ptr = new Histogram(-1); - s_store_last_to_stolen_ptr = new Histogram(-1); - s_store_first_to_last_ptr = new Histogram(-1); - } - s_init = true; + if (!s_init) { + s_total_samples = 0; + s_store_count_ptr = new Histogram(-1); + s_store_first_to_stolen_ptr = new Histogram(-1); + s_store_last_to_stolen_ptr = new Histogram(-1); + s_store_first_to_last_ptr = new Histogram(-1); + } + s_init = true; } -// Class method -void StoreTrace::printSummary(ostream& out) +void +StoreTrace::printSummary(ostream& out) { - out << "total_samples: " << s_total_samples << endl; - out << "store_count: " << (*s_store_count_ptr) << endl; - out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl; - out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl; - out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl; + out << "total_samples: " << s_total_samples << endl; + out << "store_count: " << (*s_store_count_ptr) << endl; + out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl; + out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl; + out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl; } -// Class method -void StoreTrace::clearSummary() +void +StoreTrace::clearSummary() { - StoreTrace::initSummary(); - s_total_samples = 0; - s_store_count_ptr->clear(); - s_store_first_to_stolen_ptr->clear(); - s_store_last_to_stolen_ptr->clear(); - s_store_first_to_last_ptr->clear(); + StoreTrace::initSummary(); + s_total_samples = 0; + s_store_count_ptr->clear(); + s_store_first_to_stolen_ptr->clear(); + s_store_last_to_stolen_ptr->clear(); + s_store_first_to_last_ptr->clear(); } -void StoreTrace::store(NodeID node) +void +StoreTrace::store(NodeID node) { - Time current = g_eventQueue_ptr->getTime(); - - assert((m_last_writer == -1) || (m_last_writer == node)); + Time current = g_eventQueue_ptr->getTime(); - m_last_writer = node; - if (m_last_writer == -1) { - assert(m_stores_this_interval == 0); - } + assert((m_last_writer == -1) || (m_last_writer == node)); - if (m_stores_this_interval == 0) { - // A new proessor just wrote the line, so reset the stats - m_first_store = current; - } + m_last_writer = node; + if (m_last_writer == -1) { + assert(m_stores_this_interval == 0); + } - m_last_store = current; - m_stores_this_interval++; -} + if (m_stores_this_interval == 0) { + // A new proessor just wrote the line, so reset the stats + m_first_store = current; + } -void StoreTrace::downgrade(NodeID node) -{ - if (node == m_last_writer) { - Time current = g_eventQueue_ptr->getTime(); - assert(m_stores_this_interval != 0); - assert(m_last_store != 0); - assert(m_first_store != 0); - assert(m_last_writer != -1); - - // Per line stats - m_store_first_to_stolen.add(current - m_first_store); - m_store_count.add(m_stores_this_interval); - m_store_last_to_stolen.add(current - m_last_store); - m_store_first_to_last.add(m_last_store - m_first_store); - m_total_samples++; - - // Global stats - assert(s_store_first_to_stolen_ptr != NULL); - s_store_first_to_stolen_ptr->add(current - m_first_store); - s_store_count_ptr->add(m_stores_this_interval); - s_store_last_to_stolen_ptr->add(current - m_last_store); - s_store_first_to_last_ptr->add(m_last_store - m_first_store); - s_total_samples++; - - // Initilize for next go round - m_stores_this_interval = 0; - m_last_store = 0; - m_first_store = 0; - m_last_writer = -1; - } + m_last_store = current; + m_stores_this_interval++; } -bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2) +void +StoreTrace::downgrade(NodeID node) { - return (n1->getTotal() > n2->getTotal()); + if (node == m_last_writer) { + Time current = g_eventQueue_ptr->getTime(); + assert(m_stores_this_interval != 0); + assert(m_last_store != 0); + assert(m_first_store != 0); + assert(m_last_writer != -1); + + // Per line stats + m_store_first_to_stolen.add(current - m_first_store); + m_store_count.add(m_stores_this_interval); + m_store_last_to_stolen.add(current - m_last_store); + m_store_first_to_last.add(m_last_store - m_first_store); + m_total_samples++; + + // Global stats + assert(s_store_first_to_stolen_ptr != NULL); + s_store_first_to_stolen_ptr->add(current - m_first_store); + s_store_count_ptr->add(m_stores_this_interval); + s_store_last_to_stolen_ptr->add(current - m_last_store); + s_store_first_to_last_ptr->add(m_last_store - m_first_store); + s_total_samples++; + + // Initilize for next go round + m_stores_this_interval = 0; + m_last_store = 0; + m_first_store = 0; + m_last_writer = -1; + } } diff --git a/src/mem/ruby/profiler/StoreTrace.hh b/src/mem/ruby/profiler/StoreTrace.hh index 5cdf7ce41..8bddfe6c7 100644 --- a/src/mem/ruby/profiler/StoreTrace.hh +++ b/src/mem/ruby/profiler/StoreTrace.hh @@ -1,4 +1,3 @@ - /* * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. @@ -27,82 +26,63 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * $Id$ - * - * Description: - * - */ - -#ifndef StoreTrace_H -#define StoreTrace_H +#ifndef __MEM_RUBY_PROFILER_STORETRACE_HH__ +#define __MEM_RUBY_PROFILER_STORETRACE_HH__ -#include "mem/ruby/common/Global.hh" #include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Global.hh" #include "mem/ruby/common/Histogram.hh" -class StoreTrace { -public: - // Constructors - StoreTrace() { } - explicit StoreTrace(const Address& addr); - - // Destructor - ~StoreTrace(); - - // Public Methods - void store(NodeID node); - void downgrade(NodeID node); - int getTotal() const { return m_total_samples; } - static void initSummary(); - static void printSummary(ostream& out); - static void clearSummary(); - - void print(ostream& out) const; -private: - // Private Methods - - // Private copy constructor and assignment operator - // StoreTrace(const StoreTrace& obj); - // StoreTrace& operator=(const StoreTrace& obj); - - // Class Members (s_ prefix) - static bool s_init; - static int64 s_total_samples; // Total number of store lifetimes of all lines - static Histogram* s_store_count_ptr; - static Histogram* s_store_first_to_stolen_ptr; - static Histogram* s_store_last_to_stolen_ptr; - static Histogram* s_store_first_to_last_ptr; - - // Data Members (m_ prefix) - - Address m_addr; - NodeID m_last_writer; - Time m_first_store; - Time m_last_store; - int m_stores_this_interval; - - int64 m_total_samples; // Total number of store lifetimes of this line - Histogram m_store_count; - Histogram m_store_first_to_stolen; - Histogram m_store_last_to_stolen; - Histogram m_store_first_to_last; +class StoreTrace +{ + public: + StoreTrace() { } + explicit StoreTrace(const Address& addr); + ~StoreTrace(); + + void store(NodeID node); + void downgrade(NodeID node); + int getTotal() const { return m_total_samples; } + static void initSummary(); + static void printSummary(ostream& out); + static void clearSummary(); + + void print(ostream& out) const; + + private: + static bool s_init; + static int64 s_total_samples; // Total number of store lifetimes + // of all lines + static Histogram* s_store_count_ptr; + static Histogram* s_store_first_to_stolen_ptr; + static Histogram* s_store_last_to_stolen_ptr; + static Histogram* s_store_first_to_last_ptr; + + Address m_addr; + NodeID m_last_writer; + Time m_first_store; + Time m_last_store; + int m_stores_this_interval; + + int64 m_total_samples; // Total number of store lifetimes of this line + Histogram m_store_count; + Histogram m_store_first_to_stolen; + Histogram m_store_last_to_stolen; + Histogram m_store_first_to_last; }; -bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2); - -// Output operator declaration -ostream& operator<<(ostream& out, const StoreTrace& obj); - -// ******************* Definitions ******************* +inline bool +node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2) +{ + return n1->getTotal() > n2->getTotal(); +} -// Output operator definition -extern inline -ostream& operator<<(ostream& out, const StoreTrace& obj) +inline ostream& +operator<<(ostream& out, const StoreTrace& obj) { - obj.print(out); - out << flush; - return out; + obj.print(out); + out << flush; + return out; } -#endif //StoreTrace_H +#endif // __MEM_RUBY_PROFILER_STORETRACE_HH__ |