diff options
Diffstat (limited to 'src/mem/ruby/system')
-rw-r--r-- | src/mem/ruby/system/CacheMemory.cc | 49 | ||||
-rw-r--r-- | src/mem/ruby/system/CacheMemory.hh | 16 | ||||
-rw-r--r-- | src/mem/ruby/system/DMASequencer.hh | 3 | ||||
-rw-r--r-- | src/mem/ruby/system/DirectoryMemory.cc | 1 | ||||
-rw-r--r-- | src/mem/ruby/system/MemoryVector.hh | 86 | ||||
-rw-r--r-- | src/mem/ruby/system/PerfectCacheMemory.hh | 14 | ||||
-rw-r--r-- | src/mem/ruby/system/RubyPort.cc | 86 | ||||
-rw-r--r-- | src/mem/ruby/system/RubyPort.hh | 15 | ||||
-rw-r--r-- | src/mem/ruby/system/Sequencer.cc | 19 | ||||
-rw-r--r-- | src/mem/ruby/system/Sequencer.hh | 14 | ||||
-rw-r--r-- | src/mem/ruby/system/SparseMemory.cc | 98 | ||||
-rw-r--r-- | src/mem/ruby/system/SparseMemory.hh | 27 | ||||
-rw-r--r-- | src/mem/ruby/system/System.cc | 244 | ||||
-rw-r--r-- | src/mem/ruby/system/System.hh | 52 |
14 files changed, 597 insertions, 127 deletions
diff --git a/src/mem/ruby/system/CacheMemory.cc b/src/mem/ruby/system/CacheMemory.cc index 1564128d3..9f1fe6320 100644 --- a/src/mem/ruby/system/CacheMemory.cc +++ b/src/mem/ruby/system/CacheMemory.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,7 +28,9 @@ #include "base/intmath.hh" #include "debug/RubyCache.hh" +#include "mem/protocol/AccessPermission.hh" #include "mem/ruby/system/CacheMemory.hh" +#include "mem/ruby/system/System.hh" using namespace std; @@ -364,31 +366,42 @@ CacheMemory::profileGenericRequest(GenericRequestType requestType, } void -CacheMemory::recordCacheContents(CacheRecorder& tr) const +CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const { + uint64 warmedUpBlocks = 0; + uint64 totalBlocks M5_VAR_USED = (uint64)m_cache_num_sets + * (uint64)m_cache_assoc; + for (int i = 0; i < m_cache_num_sets; i++) { for (int j = 0; j < m_cache_assoc; j++) { - AccessPermission perm = m_cache[i][j]->m_Permission; - RubyRequestType request_type = RubyRequestType_NULL; - if (perm == AccessPermission_Read_Only) { - if (m_is_instruction_only_cache) { - request_type = RubyRequestType_IFETCH; - } else { - request_type = RubyRequestType_LD; + if (m_cache[i][j] != NULL) { + AccessPermission perm = m_cache[i][j]->m_Permission; + RubyRequestType request_type = RubyRequestType_NULL; + if (perm == AccessPermission_Read_Only) { + if (m_is_instruction_only_cache) { + request_type = RubyRequestType_IFETCH; + } else { + request_type = RubyRequestType_LD; + } + } else if (perm == AccessPermission_Read_Write) { + request_type = RubyRequestType_ST; } - } else if (perm == AccessPermission_Read_Write) { - request_type = RubyRequestType_ST; - } - if (request_type != RubyRequestType_NULL) { -#if 0 - tr.addRecord(m_chip_ptr->getID(), m_cache[i][j].m_Address, - Address(0), request_type, - m_replacementPolicy_ptr->getLastAccess(i, j)); -#endif + if (request_type != RubyRequestType_NULL) { + tr->addRecord(cntrl, m_cache[i][j]->m_Address.getAddress(), + 0, request_type, + m_replacementPolicy_ptr->getLastAccess(i, j), + m_cache[i][j]->getDataBlk()); + warmedUpBlocks++; + } } } } + + DPRINTF(RubyCache, "%s: %lli blocks of %lli total blocks" + "recorded %.2f%% \n", name().c_str(), warmedUpBlocks, + (uint64)m_cache_num_sets * (uint64)m_cache_assoc, + (float(warmedUpBlocks)/float(totalBlocks))*100.0); } void diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh index f0acba9cb..f270e88cd 100644 --- a/src/mem/ruby/system/CacheMemory.hh +++ b/src/mem/ruby/system/CacheMemory.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,21 +34,15 @@ #include <vector> #include "base/hashmap.hh" -#include "mem/protocol/AccessPermission.hh" #include "mem/protocol/GenericRequestType.hh" #include "mem/protocol/RubyRequest.hh" -#include "mem/protocol/RubyRequestType.hh" -#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/DataBlock.hh" -#include "mem/ruby/common/Global.hh" #include "mem/ruby/profiler/CacheProfiler.hh" #include "mem/ruby/recorder/CacheRecorder.hh" #include "mem/ruby/slicc_interface/AbstractCacheEntry.hh" -#include "mem/ruby/slicc_interface/AbstractController.hh" #include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh" #include "mem/ruby/system/LRUPolicy.hh" #include "mem/ruby/system/PseudoLRUPolicy.hh" -#include "mem/ruby/system/System.hh" #include "params/RubyCache.hh" #include "sim/sim_object.hh" @@ -100,12 +94,7 @@ class CacheMemory : public SimObject int getLatency() const { return m_latency; } // Hook for checkpointing the contents of the cache - void recordCacheContents(CacheRecorder& tr) const; - void - setAsInstructionCache(bool is_icache) - { - m_is_instruction_only_cache = is_icache; - } + void recordCacheContents(int cntrl, CacheRecorder* tr) const; // Set this address to most recently used void setMRU(const Address& address); @@ -146,7 +135,6 @@ class CacheMemory : public SimObject // Data Members (m_prefix) bool m_is_instruction_only_cache; - bool m_is_data_only_cache; // The first index is the # of cache lines. // The second index is the the amount associativity. diff --git a/src/mem/ruby/system/DMASequencer.hh b/src/mem/ruby/system/DMASequencer.hh index 5f6b9f100..099c1d991 100644 --- a/src/mem/ruby/system/DMASequencer.hh +++ b/src/mem/ruby/system/DMASequencer.hh @@ -55,6 +55,9 @@ class DMASequencer : public RubyPort /* external interface */ RequestStatus makeRequest(PacketPtr pkt); bool busy() { return m_is_busy;} + int outstandingCount() const { return (m_is_busy ? 1 : 0); } + bool isDeadlockEventScheduled() const { return false; } + void descheduleDeadlockEvent() {} /* SLICC callback */ void dataCallback(const DataBlock & dblk); diff --git a/src/mem/ruby/system/DirectoryMemory.cc b/src/mem/ruby/system/DirectoryMemory.cc index 03aa68919..d2e00ab3b 100644 --- a/src/mem/ruby/system/DirectoryMemory.cc +++ b/src/mem/ruby/system/DirectoryMemory.cc @@ -58,6 +58,7 @@ DirectoryMemory::init() if (m_use_map) { m_sparseMemory = new SparseMemory(m_map_levels); + g_system_ptr->registerSparseMemory(m_sparseMemory); } else { m_entries = new AbstractEntry*[m_num_entries]; for (int i = 0; i < m_num_entries; i++) diff --git a/src/mem/ruby/system/MemoryVector.hh b/src/mem/ruby/system/MemoryVector.hh index 6719b9fb6..9bd3516c2 100644 --- a/src/mem/ruby/system/MemoryVector.hh +++ b/src/mem/ruby/system/MemoryVector.hh @@ -29,6 +29,7 @@ #ifndef __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__ #define __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__ +#include "base/trace.hh" #include "mem/ruby/common/Address.hh" class DirectoryMemory; @@ -48,6 +49,8 @@ class MemoryVector void write(const Address & paddr, uint8* data, int len); uint8* read(const Address & paddr, uint8* data, int len); + uint32 collatePages(uint8* &raw_data); + void populatePages(uint8* raw_data); private: uint8* getBlockPtr(const PhysAddress & addr); @@ -56,6 +59,7 @@ class MemoryVector uint8** m_pages; uint32 m_num_pages; const uint32 m_page_offset_mask; + static const uint32 PAGE_SIZE = 4096; }; inline @@ -97,7 +101,7 @@ MemoryVector::resize(uint32 size) delete [] m_pages; } m_size = size; - assert(size%4096 == 0); + assert(size%PAGE_SIZE == 0); m_num_pages = size >> 12; m_pages = new uint8*[m_num_pages]; memset(m_pages, 0, m_num_pages * sizeof(uint8*)); @@ -118,8 +122,8 @@ MemoryVector::write(const Address & paddr, uint8* data, int len) } if (all_zeros) return; - m_pages[page_num] = new uint8[4096]; - memset(m_pages[page_num], 0, 4096); + m_pages[page_num] = new uint8[PAGE_SIZE]; + memset(m_pages[page_num], 0, PAGE_SIZE); uint32 offset = paddr.getAddress() & m_page_offset_mask; memcpy(&m_pages[page_num][offset], data, len); } else { @@ -147,10 +151,82 @@ MemoryVector::getBlockPtr(const PhysAddress & paddr) { uint32 page_num = paddr.getAddress() >> 12; if (m_pages[page_num] == 0) { - m_pages[page_num] = new uint8[4096]; - memset(m_pages[page_num], 0, 4096); + m_pages[page_num] = new uint8[PAGE_SIZE]; + memset(m_pages[page_num], 0, PAGE_SIZE); } return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask]; } +/*! + * Function for collating all the pages of the physical memory together. + * In case a pointer for a page is NULL, this page needs only a single byte + * to represent that the pointer is NULL. Otherwise, it needs 1 + PAGE_SIZE + * bytes. The first represents that the page pointer is not NULL, and rest of + * the bytes represent the data on the page. + */ + +inline uint32 +MemoryVector::collatePages(uint8* &raw_data) +{ + uint32 num_zero_pages = 0; + uint32 data_size = 0; + + for (uint32 i = 0;i < m_num_pages; ++i) + { + if (m_pages[i] == 0) num_zero_pages++; + } + + raw_data = new uint8[ sizeof(uint32) /* number of pages*/ + + m_num_pages /* whether the page is all zeros */ + + PAGE_SIZE * (m_num_pages - num_zero_pages)]; + + /* Write the number of pages to be stored. */ + memcpy(raw_data, &m_num_pages, sizeof(uint32)); + data_size = sizeof(uint32); + + for (uint32 i = 0;i < m_num_pages; ++i) + { + if (m_pages[i] == 0) { + raw_data[data_size] = 0; + } else { + raw_data[data_size] = 1; + memcpy(raw_data + data_size + 1, m_pages[i], PAGE_SIZE); + data_size += PAGE_SIZE; + } + data_size += 1; + } + + return data_size; +} + +/*! + * Function for populating the pages of the memory using the available raw + * data. Each page has a byte associate with it, which represents whether the + * page was NULL or not, when all the pages were collated. The function assumes + * that the number of pages in the memory are same as those that were recorded + * in the checkpoint. + */ +inline void +MemoryVector::populatePages(uint8* raw_data) +{ + uint32 data_size = 0; + uint32 num_pages = 0; + + /* Read the number of pages that were stored. */ + memcpy(&num_pages, raw_data, sizeof(uint32)); + data_size = sizeof(uint32); + assert(num_pages == m_num_pages); + + for (uint32 i = 0;i < m_num_pages; ++i) + { + assert(m_pages[i] == 0); + if (raw_data[data_size] != 0) { + m_pages[i] = new uint8[PAGE_SIZE]; + memcpy(m_pages[i], raw_data + data_size + 1, PAGE_SIZE); + data_size += PAGE_SIZE; + } + data_size += 1; + } +} + #endif // __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__ diff --git a/src/mem/ruby/system/PerfectCacheMemory.hh b/src/mem/ruby/system/PerfectCacheMemory.hh index 772b3d1f9..b880b6434 100644 --- a/src/mem/ruby/system/PerfectCacheMemory.hh +++ b/src/mem/ruby/system/PerfectCacheMemory.hh @@ -32,7 +32,6 @@ #include "base/hashmap.hh" #include "mem/protocol/AccessPermission.hh" #include "mem/ruby/common/Address.hh" -#include "mem/ruby/common/Global.hh" template<class ENTRY> struct PerfectCacheLineState @@ -57,10 +56,6 @@ class PerfectCacheMemory static void printConfig(std::ostream& out); - // perform a cache access and see if we hit or not. Return true - // on a hit. - bool tryCacheAccess(const CacheMsg& msg, bool& block_stc, ENTRY*& entry); - // tests to see if an address is present in the cache bool isTagPresent(const Address& address) const; @@ -118,15 +113,6 @@ PerfectCacheMemory<ENTRY>::printConfig(std::ostream& out) { } -template<class ENTRY> -inline bool -PerfectCacheMemory<ENTRY>::tryCacheAccess(const CacheMsg& msg, - bool& block_stc, ENTRY*& entry) -{ - panic("not implemented"); - return true; -} - // tests to see if an address is present in the cache template<class ENTRY> inline bool diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index f7bde739e..64faf6aed 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -27,11 +27,11 @@ */ #include "cpu/testers/rubytest/RubyTester.hh" +#include "debug/Config.hh" #include "debug/Ruby.hh" #include "mem/protocol/AccessPermission.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" #include "mem/ruby/system/RubyPort.hh" -#include "mem/physical.hh" RubyPort::RubyPort(const Params *p) : MemObject(p) @@ -51,6 +51,8 @@ RubyPort::RubyPort(const Params *p) m_usingRubyTester = p->using_ruby_tester; access_phys_mem = p->access_phys_mem; + drainEvent = NULL; + ruby_system = p->ruby_system; waitingOnSequencer = false; } @@ -66,8 +68,10 @@ Port * RubyPort::getPort(const std::string &if_name, int idx) { if (if_name == "port") { - return new M5Port(csprintf("%s-port%d", name(), idx), this, - ruby_system, access_phys_mem); + M5Port* cpuPort = new M5Port(csprintf("%s-port%d", name(), idx), + this, ruby_system, access_phys_mem); + cpu_ports.push_back(cpuPort); + return cpuPort; } if (if_name == "pio_port") { @@ -508,6 +512,82 @@ RubyPort::ruby_hit_callback(PacketPtr pkt) (*i)->sendRetry(); } } + + testDrainComplete(); +} + +void +RubyPort::testDrainComplete() +{ + //If we weren't able to drain before, we might be able to now. + if (drainEvent != NULL) { + unsigned int drainCount = getDrainCount(drainEvent); + DPRINTF(Config, "Drain count: %u\n", drainCount); + if (drainCount == 0) { + drainEvent->process(); + // Clear the drain event once we're done with it. + drainEvent = NULL; + } + } +} + +unsigned int +RubyPort::getDrainCount(Event *de) +{ + int count = 0; + // + // If the sequencer is not empty, then requests need to drain. + // The outstandingCount is the number of requests outstanding and thus the + // number of times M5's timing port will process the drain event. + // + count += outstandingCount(); + + DPRINTF(Config, "outstanding count %d\n", outstandingCount()); + + // To simplify the draining process, the sequencer's deadlock detection + // event should have been descheduled. + assert(isDeadlockEventScheduled() == false); + + if (pio_port != NULL) { + count += pio_port->drain(de); + DPRINTF(Config, "count after pio check %d\n", count); + } + if (physMemPort != NULL) { + count += physMemPort->drain(de); + DPRINTF(Config, "count after physmem check %d\n", count); + } + + for (CpuPortIter p_iter = cpu_ports.begin(); p_iter != cpu_ports.end(); + p_iter++) { + M5Port* cpu_port = *p_iter; + count += cpu_port->drain(de); + DPRINTF(Config, "count after cpu port check %d\n", count); + } + + DPRINTF(Config, "final count %d\n", count); + + return count; +} + +unsigned int +RubyPort::drain(Event *de) +{ + if (isDeadlockEventScheduled()) { + descheduleDeadlockEvent(); + } + + int count = getDrainCount(de); + + // Set status + if (count != 0) { + drainEvent = de; + + changeState(SimObject::Draining); + return count; + } + + changeState(SimObject::Drained); + return 0; } void diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 88e865766..d8dbe0cda 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -33,7 +33,6 @@ #include <string> #include "mem/protocol/RequestStatus.hh" -#include "mem/ruby/slicc_interface/RubyRequest.hh" #include "mem/ruby/system/System.hh" #include "mem/mem_object.hh" #include "mem/physical.hh" @@ -115,17 +114,23 @@ class RubyPort : public MemObject Port *getPort(const std::string &if_name, int idx); virtual RequestStatus makeRequest(PacketPtr pkt) = 0; + virtual int outstandingCount() const = 0; + virtual bool isDeadlockEventScheduled() const = 0; + virtual void descheduleDeadlockEvent() = 0; // // Called by the controller to give the sequencer a pointer. // A pointer to the controller is needed for atomic support. // void setController(AbstractController* _cntrl) { m_controller = _cntrl; } + int getId() { return m_version; } + unsigned int drain(Event *de); protected: const std::string m_name; void ruby_hit_callback(PacketPtr pkt); void hit(PacketPtr pkt); + void testDrainComplete(); int m_version; AbstractController* m_controller; @@ -143,11 +148,19 @@ class RubyPort : public MemObject } } + unsigned int getDrainCount(Event *de); + uint16_t m_port_id; uint64_t m_request_cnt; M5Port* physMemPort; + /*! Vector of CPU Port attached to this Ruby port. */ + typedef std::vector<M5Port*>::iterator CpuPortIter; + std::vector<M5Port*> cpu_ports; + + Event *drainEvent; + PhysicalMemory* physmem; RubySystem* ruby_system; diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 7137dcc28..3f9ceb34d 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -40,9 +40,7 @@ #include "mem/protocol/RubyAccessMode.hh" #include "mem/ruby/buffers/MessageBuffer.hh" #include "mem/ruby/common/Global.hh" -#include "mem/ruby/common/SubBlock.hh" #include "mem/ruby/profiler/Profiler.hh" -#include "mem/ruby/recorder/Tracer.hh" #include "mem/ruby/slicc_interface/RubyRequest.hh" #include "mem/ruby/system/CacheMemory.hh" #include "mem/ruby/system/Sequencer.hh" @@ -521,7 +519,11 @@ Sequencer::hitCallback(SequencerRequest* srequest, } // update the data - if (pkt->getPtr<uint8_t>(true) != NULL) { + if (g_system_ptr->m_warmup_enabled) { + assert(pkt->getPtr<uint8_t>(false) != NULL); + data.setData(pkt->getPtr<uint8_t>(false), + request_address.getOffset(), pkt->getSize()); + } else if (pkt->getPtr<uint8_t>(true) != NULL) { if ((type == RubyRequestType_LD) || (type == RubyRequestType_IFETCH) || (type == RubyRequestType_RMW_Read) || @@ -553,8 +555,17 @@ Sequencer::hitCallback(SequencerRequest* srequest, testerSenderState->subBlock->mergeFrom(data); } - ruby_hit_callback(pkt); delete srequest; + + if (g_system_ptr->m_warmup_enabled) { + delete pkt; + g_system_ptr->m_cache_recorder->enqueueNextFetchRequest(); + } else if (g_system_ptr->m_cooldown_enabled) { + delete pkt; + g_system_ptr->m_cache_recorder->enqueueNextFlushRequest(); + } else { + ruby_hit_callback(pkt); + } } bool diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 7c2d0af13..4a6d46c01 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -39,8 +39,6 @@ #include "mem/ruby/system/RubyPort.hh" class DataBlock; -class CacheMsg; -class MachineID; class CacheMemory; class RubySequencerParams; @@ -100,6 +98,18 @@ class Sequencer : public RubyPort, public Consumer RequestStatus makeRequest(PacketPtr pkt); bool empty() const; + int outstandingCount() const { return m_outstanding_count; } + bool + isDeadlockEventScheduled() const + { + return deadlockCheckEvent.scheduled(); + } + + void + descheduleDeadlockEvent() + { + deschedule(deadlockCheckEvent); + } void print(std::ostream& out) const; void printStats(std::ostream& out) const; diff --git a/src/mem/ruby/system/SparseMemory.cc b/src/mem/ruby/system/SparseMemory.cc index 8e4f37c46..db8d494f8 100644 --- a/src/mem/ruby/system/SparseMemory.cc +++ b/src/mem/ruby/system/SparseMemory.cc @@ -1,5 +1,6 @@ /* * Copyright (c) 2009 Advanced Micro Devices, Inc. + * Copyright (c) 2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,6 +27,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <queue> + #include "debug/RubyCache.hh" #include "mem/ruby/system/SparseMemory.hh" #include "mem/ruby/system/System.hh" @@ -82,19 +85,19 @@ SparseMemory::recursivelyRemoveTables(SparseMapType* curTable, int curLevel) SparseMapType::iterator iter; for (iter = curTable->begin(); iter != curTable->end(); iter++) { - SparseMemEntry* entryStruct = &((*iter).second); + SparseMemEntry entry = (*iter).second; if (curLevel != (m_number_of_levels - 1)) { // If the not at the last level, analyze those lower level // tables first, then delete those next tables - SparseMapType* nextTable = (SparseMapType*)(entryStruct->entry); + SparseMapType* nextTable = (SparseMapType*)(entry); recursivelyRemoveTables(nextTable, (curLevel + 1)); delete nextTable; } else { // If at the last level, delete the directory entry - delete (AbstractEntry*)(entryStruct->entry); + delete (AbstractEntry*)(entry); } - entryStruct->entry = NULL; + entry = NULL; } // Once all entries have been deleted, erase the entries @@ -134,7 +137,7 @@ SparseMemory::exist(const Address& address) const // If the address is found, move on to the next level. // Otherwise, return not found if (curTable->count(curAddress) != 0) { - curTable = (SparseMapType*)(((*curTable)[curAddress]).entry); + curTable = (SparseMapType*)((*curTable)[curAddress]); } else { DPRINTF(RubyCache, "Not found\n"); return false; @@ -156,7 +159,6 @@ SparseMemory::add(const Address& address, AbstractEntry* entry) Address curAddress; SparseMapType* curTable = m_map_head; - SparseMemEntry* entryStruct = NULL; // Initiallize the high bit to be the total number of bits plus // the block offset. However the highest bit index is one less @@ -179,7 +181,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry) // if the address exists in the cur table, move on. Otherwise // create a new table. if (curTable->count(curAddress) != 0) { - curTable = (SparseMapType*)(((*curTable)[curAddress]).entry); + curTable = (SparseMapType*)((*curTable)[curAddress]); } else { m_adds_per_level[level]++; @@ -194,9 +196,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry) // Create the pointer container SparseMemEntry and add it // to the table. - entryStruct = new SparseMemEntry; - entryStruct->entry = newEntry; - (*curTable)[curAddress] = *entryStruct; + (*curTable)[curAddress] = newEntry; // Move to the next level of the heirarchy curTable = (SparseMapType*)newEntry; @@ -215,7 +215,7 @@ SparseMemory::recursivelyRemoveLevels(const Address& address, { Address curAddress; CurNextInfo nextInfo; - SparseMemEntry* entryStruct; + SparseMemEntry entry; // create the appropriate address for this level // Note: that set Address is inclusive of the specified range, @@ -231,11 +231,11 @@ SparseMemory::recursivelyRemoveLevels(const Address& address, assert(curInfo.curTable->count(curAddress) != 0); - entryStruct = &((*(curInfo.curTable))[curAddress]); + entry = (*(curInfo.curTable))[curAddress]; if (curInfo.level < (m_number_of_levels - 1)) { // set up next level's info - nextInfo.curTable = (SparseMapType*)(entryStruct->entry); + nextInfo.curTable = (SparseMapType*)(entry); nextInfo.level = curInfo.level + 1; nextInfo.highBit = curInfo.highBit - @@ -252,15 +252,15 @@ SparseMemory::recursivelyRemoveLevels(const Address& address, if (tableSize == 0) { m_removes_per_level[curInfo.level]++; delete nextInfo.curTable; - entryStruct->entry = NULL; + entry = NULL; curInfo.curTable->erase(curAddress); } } else { // if this is the last level, we have reached the Directory // Entry and thus we should delete it including the // SparseMemEntry container struct. - delete (AbstractEntry*)(entryStruct->entry); - entryStruct->entry = NULL; + delete (AbstractEntry*)(entry); + entry = NULL; curInfo.curTable->erase(curAddress); m_removes_per_level[curInfo.level]++; } @@ -331,7 +331,7 @@ SparseMemory::lookup(const Address& address) // If the address is found, move on to the next level. // Otherwise, return not found if (curTable->count(curAddress) != 0) { - curTable = (SparseMapType*)(((*curTable)[curAddress]).entry); + curTable = (SparseMapType*)((*curTable)[curAddress]); } else { DPRINTF(RubyCache, "Not found\n"); return NULL; @@ -345,6 +345,70 @@ SparseMemory::lookup(const Address& address) } void +SparseMemory::recordBlocks(int cntrl_id, CacheRecorder* tr) const +{ + queue<SparseMapType*> unexplored_nodes[2]; + queue<physical_address_t> address_of_nodes[2]; + + unexplored_nodes[0].push(m_map_head); + address_of_nodes[0].push(0); + + int parity_of_level = 0; + physical_address_t address, temp_address; + Address curAddress; + + // Initiallize the high bit to be the total number of bits plus + // the block offset. However the highest bit index is one less + // than this value. + int highBit = m_total_number_of_bits + RubySystem::getBlockSizeBits(); + int lowBit; + + for (int cur_level = 0; cur_level < m_number_of_levels; cur_level++) { + + // create the appropriate address for this level + // Note: that set Address is inclusive of the specified range, + // thus the high bit is one less than the total number of bits + // used to create the address. + lowBit = highBit - m_number_of_bits_per_level[cur_level]; + + while (!unexplored_nodes[parity_of_level].empty()) { + + SparseMapType* node = unexplored_nodes[parity_of_level].front(); + unexplored_nodes[parity_of_level].pop(); + + address = address_of_nodes[parity_of_level].front(); + address_of_nodes[parity_of_level].pop(); + + SparseMapType::iterator iter; + + for (iter = node->begin(); iter != node->end(); iter++) { + SparseMemEntry entry = (*iter).second; + curAddress = (*iter).first; + + if (cur_level != (m_number_of_levels - 1)) { + // If not at the last level, put this node in the queue + unexplored_nodes[1 - parity_of_level].push( + (SparseMapType*)(entry)); + address_of_nodes[1 - parity_of_level].push(address | + (curAddress.getAddress() << lowBit)); + } else { + // If at the last level, add a trace record + temp_address = address | (curAddress.getAddress() + << lowBit); + DataBlock block = ((AbstractEntry*)entry)->getDataBlk(); + tr->addRecord(cntrl_id, temp_address, 0, RubyRequestType_ST, 0, + block); + } + } + } + + // Adjust the highBit value for the next level + highBit -= m_number_of_bits_per_level[cur_level]; + parity_of_level = 1 - parity_of_level; + } +} + +void SparseMemory::print(ostream& out) const { } diff --git a/src/mem/ruby/system/SparseMemory.hh b/src/mem/ruby/system/SparseMemory.hh index f6937ef54..e4237dbcd 100644 --- a/src/mem/ruby/system/SparseMemory.hh +++ b/src/mem/ruby/system/SparseMemory.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2009 Advanced Micro Devices, Inc. + * Copyright (c) 2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,15 +33,11 @@ #include <iostream> #include "base/hashmap.hh" -#include "mem/ruby/slicc_interface/AbstractEntry.hh" #include "mem/ruby/common/Address.hh" -#include "mem/ruby/common/Global.hh" - -struct SparseMemEntry -{ - void* entry; -}; +#include "mem/ruby/recorder/CacheRecorder.hh" +#include "mem/ruby/slicc_interface/AbstractEntry.hh" +typedef void* SparseMemEntry; typedef m5::hash_map<Address, SparseMemEntry> SparseMapType; struct CurNextInfo @@ -63,6 +60,14 @@ class SparseMemory void add(const Address& address, AbstractEntry*); void remove(const Address& address); + /*! + * Function for recording the contents of memory. This function walks + * through all the levels of the sparse memory in a breadth first + * fashion. This might need more memory than a depth first approach. + * But breadth first seems easier to me than a depth first approach. + */ + void recordBlocks(int cntrl_id, CacheRecorder *) const; + AbstractEntry* lookup(const Address& address); // Print cache contents @@ -95,12 +100,4 @@ class SparseMemory uint64_t* m_removes_per_level; }; -inline std::ostream& -operator<<(std::ostream& out, const SparseMemEntry& obj) -{ - out << "SparseMemEntry"; - out << std::flush; - return out; -} - #endif // __MEM_RUBY_SYSTEM_SPARSEMEMORY_HH__ diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc index 81824b9b7..6f191819b 100644 --- a/src/mem/ruby/system/System.cc +++ b/src/mem/ruby/system/System.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,16 +26,19 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <fcntl.h> +#include <zlib.h> + +#include <cstdio> + #include "base/intmath.hh" #include "base/output.hh" -#include "mem/ruby/buffers/MessageBuffer.hh" +#include "debug/RubySystem.hh" #include "mem/ruby/common/Address.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/profiler/Profiler.hh" -#include "mem/ruby/recorder/Tracer.hh" -#include "mem/ruby/slicc_interface/AbstractController.hh" -#include "mem/ruby/system/MemoryVector.hh" #include "mem/ruby/system/System.hh" +#include "sim/simulate.hh" using namespace std; @@ -49,7 +52,6 @@ int RubySystem::m_memory_size_bits; Network* RubySystem::m_network_ptr; Profiler* RubySystem::m_profiler_ptr; -Tracer* RubySystem::m_tracer_ptr; MemoryVector* RubySystem::m_mem_vec_ptr; RubySystem::RubySystem(const Params *p) @@ -88,6 +90,8 @@ RubySystem::RubySystem(const Params *p) // RubyExitCallback* rubyExitCB = new RubyExitCallback(p->stats_filename); registerExitCallback(rubyExitCB); + m_warmup_enabled = false; + m_cooldown_enabled = false; } void @@ -109,22 +113,21 @@ RubySystem::registerProfiler(Profiler* profiler_ptr) } void -RubySystem::registerTracer(Tracer* tracer_ptr) +RubySystem::registerAbstractController(AbstractController* cntrl) { - m_tracer_ptr = tracer_ptr; + m_abs_cntrl_vec.push_back(cntrl); } void -RubySystem::registerAbstractController(AbstractController* cntrl) +RubySystem::registerSparseMemory(SparseMemory* s) { - m_abs_cntrl_vec.push_back(cntrl); + m_sparse_memory_vector.push_back(s); } RubySystem::~RubySystem() { delete m_network_ptr; delete m_profiler_ptr; - delete m_tracer_ptr; if (m_mem_vec_ptr) delete m_mem_vec_ptr; } @@ -167,9 +170,143 @@ RubySystem::printStats(ostream& out) } void +RubySystem::writeCompressedTrace(uint8* raw_data, string filename, + uint64 uncompressed_trace_size) +{ + // Create the checkpoint file for the memory + string thefile = Checkpoint::dir() + "/" + filename.c_str(); + + int fd = creat(thefile.c_str(), 0664); + if (fd < 0) { + perror("creat"); + fatal("Can't open memory trace file '%s'\n", filename); + } + + gzFile compressedMemory = gzdopen(fd, "wb"); + if (compressedMemory == NULL) + fatal("Insufficient memory to allocate compression state for %s\n", + filename); + + if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != + uncompressed_trace_size) { + fatal("Write failed on memory trace file '%s'\n", filename); + } + + if (gzclose(compressedMemory)) { + fatal("Close failed on memory trace file '%s'\n", filename); + } + delete raw_data; +} + +void RubySystem::serialize(std::ostream &os) { + m_cooldown_enabled = true; + + vector<Sequencer*> sequencer_map; + Sequencer* sequencer_ptr = NULL; + int cntrl_id = -1; + + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); + if (sequencer_ptr == NULL) { + sequencer_ptr = sequencer_map[cntrl]; + cntrl_id = cntrl; + } + } + + assert(sequencer_ptr != NULL); + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + if (sequencer_map[cntrl] == NULL) { + sequencer_map[cntrl] = sequencer_ptr; + } + } + + // Create the CacheRecorder and record the cache trace + m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map); + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); + } + + // save the current tick value + Tick curtick_original = curTick(); + // save the event queue head + Event* eventq_head = eventq->replaceHead(NULL); + + // Schedule an event to start cache cooldown + RubyEvent* e = new RubyEvent(this); + schedule(e,curTick()); + simulate(); + + // Restore eventq head + eventq_head = eventq->replaceHead(eventq_head); + // Restore curTick + curTick(curtick_original); + + uint8* raw_data = NULL; + + if (m_mem_vec_ptr != NULL) { + uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data); + + string memory_trace_file = name() + ".memory.gz"; + writeCompressedTrace(raw_data, memory_trace_file, + memory_trace_size); + + SERIALIZE_SCALAR(memory_trace_file); + SERIALIZE_SCALAR(memory_trace_size); + + } else { + for (int i = 0; i < m_sparse_memory_vector.size(); ++i) { + m_sparse_memory_vector[i]->recordBlocks(cntrl_id, + m_cache_recorder); + } + } + + // Aggergate the trace entries together into a single array + raw_data = new uint8_t[4096]; + uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, + 4096); + string cache_trace_file = name() + ".cache.gz"; + writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); + + SERIALIZE_SCALAR(cache_trace_file); + SERIALIZE_SCALAR(cache_trace_size); + m_cooldown_enabled = false; +} + +void +RubySystem::readCompressedTrace(string filename, uint8*& raw_data, + uint64& uncompressed_trace_size) +{ + // Read the trace file + gzFile compressedTrace; + + // trace file + int fd = open(filename.c_str(), O_RDONLY); + if (fd < 0) { + perror("open"); + fatal("Unable to open trace file %s", filename); + } + + compressedTrace = gzdopen(fd, "rb"); + if (compressedTrace == NULL) { + fatal("Insufficient memory to allocate compression state for %s\n", + filename); + } + + raw_data = new uint8_t[uncompressed_trace_size]; + if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < + uncompressed_trace_size) { + fatal("Unable to read complete trace from file %s\n", filename); + } + + if (gzclose(compressedTrace)) { + fatal("Failed to close cache trace file '%s'\n", filename); + } } void @@ -181,18 +318,95 @@ RubySystem::unserialize(Checkpoint *cp, const string §ion) // value of curTick() // clearStats(); + uint8* uncompressed_trace = NULL; + + if (m_mem_vec_ptr != NULL) { + string memory_trace_file; + uint64 memory_trace_size = 0; + + UNSERIALIZE_SCALAR(memory_trace_file); + UNSERIALIZE_SCALAR(memory_trace_size); + memory_trace_file = cp->cptDir + "/" + memory_trace_file; + + readCompressedTrace(memory_trace_file, uncompressed_trace, + memory_trace_size); + m_mem_vec_ptr->populatePages(uncompressed_trace); + + delete uncompressed_trace; + uncompressed_trace = NULL; + } + + string cache_trace_file; + uint64 cache_trace_size = 0; + + UNSERIALIZE_SCALAR(cache_trace_file); + UNSERIALIZE_SCALAR(cache_trace_size); + cache_trace_file = cp->cptDir + "/" + cache_trace_file; + + readCompressedTrace(cache_trace_file, uncompressed_trace, + cache_trace_size); + m_warmup_enabled = true; + + vector<Sequencer*> sequencer_map; + Sequencer* t = NULL; + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); + if(t == NULL) t = sequencer_map[cntrl]; + } + + assert(t != NULL); + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + if (sequencer_map[cntrl] == NULL) { + sequencer_map[cntrl] = t; + } + } + + m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, + sequencer_map); } void -RubySystem::clearStats() const +RubySystem::startup() { - m_profiler_ptr->clearStats(); - m_network_ptr->clearStats(); + if (m_warmup_enabled) { + // save the current tick value + Tick curtick_original = curTick(); + // save the event queue head + Event* eventq_head = eventq->replaceHead(NULL); + // set curTick to 0 + curTick(0); + + // Schedule an event to start cache warmup + RubyEvent* e = new RubyEvent(this); + schedule(e,curTick()); + simulate(); + + delete m_cache_recorder; + m_cache_recorder = NULL; + m_warmup_enabled = false; + // Restore eventq head + eventq_head = eventq->replaceHead(eventq_head); + // Restore curTick + curTick(curtick_original); + } +} + +void +RubySystem::RubyEvent::process() +{ + if (ruby_system->m_warmup_enabled) { + ruby_system->m_cache_recorder->enqueueNextFetchRequest(); + } else if (ruby_system->m_cooldown_enabled) { + ruby_system->m_cache_recorder->enqueueNextFlushRequest(); + } } void -RubySystem::recordCacheContents(CacheRecorder& tr) const +RubySystem::clearStats() const { + m_profiler_ptr->clearStats(); + m_network_ptr->clearStats(); } #ifdef CHECK_COHERENCE diff --git a/src/mem/ruby/system/System.hh b/src/mem/ruby/system/System.hh index 704cc3b27..461abffe2 100644 --- a/src/mem/ruby/system/System.hh +++ b/src/mem/ruby/system/System.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,21 +38,34 @@ #include "base/callback.hh" #include "mem/ruby/common/Global.hh" #include "mem/ruby/eventqueue/RubyEventQueue.hh" -#include "mem/ruby/system/RubyPort.hh" +#include "mem/ruby/recorder/CacheRecorder.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" +#include "mem/ruby/system/MemoryVector.hh" +#include "mem/ruby/system/SparseMemory.hh" #include "params/RubySystem.hh" #include "sim/sim_object.hh" -class AbstractController; -class CacheRecorder; -class MemoryVector; class Network; class Profiler; -class Tracer; class RubySystem : public SimObject { public: + class RubyEvent : public Event + { + public: + RubyEvent(RubySystem* _ruby_system) + { + ruby_system = _ruby_system; + } + private: + void process(); + + RubySystem* ruby_system; + }; + + friend class RubyEvent; + typedef RubySystemParams Params; RubySystem(const Params *p); ~RubySystem(); @@ -86,13 +99,6 @@ class RubySystem : public SimObject return m_profiler_ptr; } - static Tracer* - getTracer() - { - assert(m_tracer_ptr != NULL); - return m_tracer_ptr; - } - static MemoryVector* getMemoryVector() { @@ -100,7 +106,6 @@ class RubySystem : public SimObject return m_mem_vec_ptr; } - void recordCacheContents(CacheRecorder& tr) const; static void printConfig(std::ostream& out); static void printStats(std::ostream& out); void clearStats() const; @@ -114,13 +119,15 @@ class RubySystem : public SimObject void print(std::ostream& out) const; - virtual void serialize(std::ostream &os); - virtual void unserialize(Checkpoint *cp, const std::string §ion); + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + void process(); + void startup(); void registerNetwork(Network*); void registerProfiler(Profiler*); - void registerTracer(Tracer*); void registerAbstractController(AbstractController*); + void registerSparseMemory(SparseMemory*); private: // Private copy constructor and assignment operator @@ -130,6 +137,11 @@ class RubySystem : public SimObject void init(); static void printSystemConfig(std::ostream& out); + void readCompressedTrace(std::string filename, + uint8*& raw_data, + uint64& uncompressed_trace_size); + void writeCompressedTrace(uint8* raw_data, std::string file, + uint64 uncompressed_trace_size); private: // configuration parameters @@ -140,14 +152,16 @@ class RubySystem : public SimObject static int m_block_size_bits; static uint64 m_memory_size_bytes; static int m_memory_size_bits; - static Network* m_network_ptr; public: static Profiler* m_profiler_ptr; - static Tracer* m_tracer_ptr; static MemoryVector* m_mem_vec_ptr; std::vector<AbstractController*> m_abs_cntrl_vec; + bool m_warmup_enabled; + bool m_cooldown_enabled; + CacheRecorder* m_cache_recorder; + std::vector<SparseMemory*> m_sparse_memory_vector; }; inline std::ostream& |