From 93f2f69657d0a2420a2c86b71505e6d27e6e2a38 Mon Sep 17 00:00:00 2001 From: Daniel Sanchez Date: Mon, 11 May 2009 10:38:46 -0700 Subject: ruby: Working M5 interface and updated Ruby interface. This changeset also includes a lot of work from Derek Hower RubyMemory is now both a driver for Ruby and a port for M5. Changed makeRequest/hitCallback interface. Brought packets (superficially) into the sequencer. Modified tester infrastructure to be packet based. and Ruby can be used together through the example ruby_se.py script. SPARC parallel applications work, and the timing *seems* right from combined M5/Ruby debug traces. To run, % build/ALPHA_SE/m5.debug configs/example/ruby_se.py -c tests/test-progs/hello/bin/alpha/linux/hello -n 4 -t --- src/mem/ruby/system/CacheMemory.hh | 13 +++++-- src/mem/ruby/system/Sequencer.cc | 58 ++++++++++++++++++++-------- src/mem/ruby/system/Sequencer.hh | 8 ++-- src/mem/ruby/system/StoreBuffer.cc | 16 ++++---- src/mem/ruby/system/StoreBuffer.hh | 5 ++- src/mem/ruby/system/System.cc | 79 ++++++++++++++++++++++---------------- src/mem/ruby/system/System.hh | 4 ++ 7 files changed, 117 insertions(+), 66 deletions(-) (limited to 'src/mem/ruby/system') diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh index 4217a8685..a8306c06f 100644 --- a/src/mem/ruby/system/CacheMemory.hh +++ b/src/mem/ruby/system/CacheMemory.hh @@ -42,7 +42,9 @@ #include "mem/ruby/common/Global.hh" #include "mem/protocol/AccessPermission.hh" #include "mem/ruby/common/Address.hh" -#include "mem/ruby/recorder/CacheRecorder.hh" + +//dsm: PRUNED +//#include "mem/ruby/recorder/CacheRecorder.hh" #include "mem/protocol/CacheRequestType.hh" #include "mem/gems_common/Vector.hh" #include "mem/ruby/common/DataBlock.hh" @@ -142,6 +144,8 @@ private: int m_cache_num_sets; int m_cache_num_set_bits; int m_cache_assoc; + + bool is_locked; // for LL/SC }; // Output operator declaration @@ -489,7 +493,10 @@ template inline void CacheMemory::recordCacheContents(CacheRecorder& tr) const { - for (int i = 0; i < m_cache_num_sets; i++) { +//dsm: Uses CacheRecorder, PRUNED +assert(false); + +/* for (int i = 0; i < m_cache_num_sets; i++) { for (int j = 0; j < m_cache_assoc; j++) { AccessPermission perm = m_cache[i][j].m_Permission; CacheRequestType request_type = CacheRequestType_NULL; @@ -508,7 +515,7 @@ void CacheMemory::recordCacheContents(CacheRecorder& tr) const Address(0), request_type, m_replacementPolicy_ptr->getLastAccess(i, j)); } } - } + }*/ } template diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index fa5b75eb3..82eef2901 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -46,6 +46,7 @@ #include "mem/ruby/common/SubBlock.hh" #include "mem/protocol/Protocol.hh" #include "mem/gems_common/Map.hh" +#include "mem/packet.hh" Sequencer::Sequencer(AbstractChip* chip_ptr, int version) { m_chip_ptr = chip_ptr; @@ -58,6 +59,8 @@ Sequencer::Sequencer(AbstractChip* chip_ptr, int version) { m_writeRequestTable_ptr = new Map*[smt_threads]; m_readRequestTable_ptr = new Map*[smt_threads]; + m_packetTable_ptr = new Map; + for(int p=0; p < smt_threads; ++p){ m_writeRequestTable_ptr[p] = new Map; m_readRequestTable_ptr[p] = new Map; @@ -603,7 +606,8 @@ void Sequencer::hitCallback(const CacheMsg& request, DataBlock& data, GenericMac (type == CacheRequestType_ATOMIC); if (TSO && write) { - m_chip_ptr->m_L1Cache_storeBuffer_vec[m_version]->callBack(line_address(request.getAddress()), data); + m_chip_ptr->m_L1Cache_storeBuffer_vec[m_version]->callBack(line_address(request.getAddress()), data, + m_packetTable_ptr->lookup(request.getAddress())); } else { // Copy the correct bytes out of the cache line into the subblock @@ -616,7 +620,23 @@ void Sequencer::hitCallback(const CacheMsg& request, DataBlock& data, GenericMac } // Call into the Driver and let it read and/or modify the sub-block - g_system_ptr->getDriver()->hitCallback(m_chip_ptr->getID()*RubyConfig::numberOfProcsPerChip()+m_version, subblock, type, threadID); + Packet* pkt = m_packetTable_ptr->lookup(request.getAddress()); + + // update data if this is a store/atomic + + /* + if (pkt->req->isCondSwap()) { + L1Cache_Entry entry = m_L1Cache_vec[m_version]->lookup(Address(pkt->req->physAddr())); + DataBlk datablk = entry->getDataBlk(); + uint8_t *orig_data = datablk.getArray(); + if ( datablk.equal(pkt->req->getExtraData()) ) + datablk->setArray(pkt->getData()); + pkt->setData(orig_data); + } + */ + + g_system_ptr->getDriver()->hitCallback(pkt); + m_packetTable_ptr->remove(request.getAddress()); // If the request was a Store or Atomic, apply the changes in the SubBlock to the DataBlock // (This is only triggered for the non-TSO case) @@ -632,6 +652,7 @@ void Sequencer::printDebug(){ g_system_ptr->getDriver()->printDebug(); } +//dsm: breaks build, delayed // Returns true if the sequencer already has a load or store outstanding bool Sequencer::isReady(const Packet* pkt) const @@ -665,7 +686,7 @@ Sequencer::isReady(const Packet* pkt) const Address(logical_addr), // Virtual Address thread // SMT thread ); - isReady(request); + return isReady(request); } bool @@ -701,26 +722,36 @@ Sequencer::isReady(const CacheMsg& request) const return true; } -// Called by Driver +//dsm: breaks build, delayed +// Called by Driver (Simics or Tester). void -Sequencer::makeRequest(const Packet* pkt, void* data) +Sequencer::makeRequest(Packet* pkt) { int cpu_number = pkt->req->contextId(); la_t logical_addr = pkt->req->getVaddr(); pa_t physical_addr = pkt->req->getPaddr(); int request_size = pkt->getSize(); CacheRequestType type_of_request; + PrefetchBit prefetch; + bool write = false; if ( pkt->req->isInstFetch() ) { type_of_request = CacheRequestType_IFETCH; } else if ( pkt->req->isLocked() || pkt->req->isSwap() ) { type_of_request = CacheRequestType_ATOMIC; + write = true; } else if ( pkt->isRead() ) { type_of_request = CacheRequestType_LD; } else if ( pkt->isWrite() ) { type_of_request = CacheRequestType_ST; + write = true; } else { assert(false); } + if (pkt->req->isPrefetch()) { + prefetch = PrefetchBit_Yes; + } else { + prefetch = PrefetchBit_No; + } la_t virtual_pc = pkt->req->getPC(); int isPriv = false; // TODO: get permission data int thread = pkt->req->threadId(); @@ -733,28 +764,21 @@ Sequencer::makeRequest(const Packet* pkt, void* data) Address(virtual_pc), access_mode, // User/supervisor mode request_size, // Size in bytes of request - PrefetchBit_No, // Not a prefetch + prefetch, 0, // Version number Address(logical_addr), // Virtual Address thread // SMT thread ); - makeRequest(request); -} - -void -Sequencer::makeRequest(const CacheMsg& request) -{ - bool write = (request.getType() == CacheRequestType_ST) || - (request.getType() == CacheRequestType_ATOMIC); - if (TSO && (request.getPrefetch() == PrefetchBit_No) && write) { + if ( TSO && write && !pkt->req->isPrefetch() ) { assert(m_chip_ptr->m_L1Cache_storeBuffer_vec[m_version]->isReady()); - m_chip_ptr->m_L1Cache_storeBuffer_vec[m_version]->insertStore(request); + m_chip_ptr->m_L1Cache_storeBuffer_vec[m_version]->insertStore(pkt, request); return; } - bool hit = doRequest(request); + m_packetTable_ptr->insert(Address( physical_addr ), pkt); + doRequest(request); } bool Sequencer::doRequest(const CacheMsg& request) { diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index f4cc03131..d34a2fd3e 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -45,13 +45,13 @@ #include "mem/protocol/GenericMachineType.hh" #include "mem/protocol/PrefetchBit.hh" #include "mem/gems_common/Map.hh" -#include "mem/packet.hh" class DataBlock; class AbstractChip; class CacheMsg; class Address; class MachineID; +class Packet; class Sequencer : public Consumer { public: @@ -103,8 +103,7 @@ public: void printDebug(); // called by Tester or Simics - void makeRequest(const Packet* pkt, void* data); - void makeRequest(const CacheMsg& request); // depricate this function + void makeRequest(Packet* pkt); bool doRequest(const CacheMsg& request); void issueRequest(const CacheMsg& request); bool isReady(const Packet* pkt) const; @@ -143,6 +142,9 @@ private: // One request table per SMT thread Map** m_writeRequestTable_ptr; Map** m_readRequestTable_ptr; + + Map* m_packetTable_ptr; + // Global outstanding request count, across all request tables int m_outstanding_count; bool m_deadlock_check_scheduled; diff --git a/src/mem/ruby/system/StoreBuffer.cc b/src/mem/ruby/system/StoreBuffer.cc index 7f43771f3..280decdd8 100644 --- a/src/mem/ruby/system/StoreBuffer.cc +++ b/src/mem/ruby/system/StoreBuffer.cc @@ -44,6 +44,7 @@ #include "mem/ruby/system/Sequencer.hh" #include "mem/ruby/common/SubBlock.hh" #include "mem/ruby/profiler/Profiler.hh" +#include "mem/packet.hh" // *** Begin Helper class *** struct StoreBufferEntry { @@ -150,7 +151,8 @@ void StoreBuffer::printConfig(ostream& out) // Handle an incoming store request, this method is responsible for // calling hitCallback as needed -void StoreBuffer::insertStore(const CacheMsg& request) +void +StoreBuffer::insertStore(Packet* pkt, const CacheMsg& request) { Address addr = request.getAddress(); CacheRequestType type = request.getType(); @@ -173,7 +175,7 @@ void StoreBuffer::insertStore(const CacheMsg& request) // Perform the hit-callback for the store SubBlock subblock(addr, size); if(type == CacheRequestType_ST) { - g_system_ptr->getDriver()->hitCallback(m_chip_ptr->getID(), subblock, type, threadID); + g_system_ptr->getDriver()->hitCallback(pkt); assert(subblock.getSize() != 0); } else { // wait to perform the hitCallback until later for Atomics @@ -181,9 +183,9 @@ void StoreBuffer::insertStore(const CacheMsg& request) // Perform possible pre-fetch if(!isEmpty()) { - CacheMsg new_request = request; - new_request.getPrefetch() = PrefetchBit_Yes; - m_chip_ptr->getSequencer(m_version)->makeRequest(new_request); + Packet new_pkt(pkt); + pkt->req->setFlags(Request::PREFETCH); + m_chip_ptr->getSequencer(m_version)->makeRequest(&new_pkt); } // Update the StoreCache @@ -200,7 +202,7 @@ void StoreBuffer::insertStore(const CacheMsg& request) processHeadOfQueue(); } -void StoreBuffer::callBack(const Address& addr, DataBlock& data) +void StoreBuffer::callBack(const Address& addr, DataBlock& data, Packet* pkt) { DEBUG_MSG(STOREBUFFER_COMP, MedPrio, "callBack"); DEBUG_EXPR(STOREBUFFER_COMP, MedPrio, g_eventQueue_ptr->getTime()); @@ -220,7 +222,7 @@ void StoreBuffer::callBack(const Address& addr, DataBlock& data) } else { // We waited to perform the hitCallback until now for Atomics peek().m_subblock.mergeFrom(data); // copy the correct bytes from DataBlock into the SubBlock for the Load part of the atomic Load/Store - g_system_ptr->getDriver()->hitCallback(m_chip_ptr->getID(), peek().m_subblock, type, threadID); + g_system_ptr->getDriver()->hitCallback(pkt); m_seen_atomic = false; /// FIXME - record the time spent in the store buffer - split out ST vs ATOMIC diff --git a/src/mem/ruby/system/StoreBuffer.hh b/src/mem/ruby/system/StoreBuffer.hh index 2fae52643..2c9283f4b 100644 --- a/src/mem/ruby/system/StoreBuffer.hh +++ b/src/mem/ruby/system/StoreBuffer.hh @@ -49,6 +49,7 @@ class DataBlock; class SubBlock; class StoreBufferEntry; class AbstractChip; +class Packet; template class Vector; @@ -62,8 +63,8 @@ public: // Public Methods void wakeup(); // Used only for deadlock detection - void callBack(const Address& addr, DataBlock& data); - void insertStore(const CacheMsg& request); + void callBack(const Address& addr, DataBlock& data, Packet* pkt); + void insertStore(Packet* pkt, const CacheMsg& request); void updateSubBlock(SubBlock& sub_block) const { m_store_cache.update(sub_block); } bool trySubBlock(const SubBlock& sub_block) const { assert(isReady()); return m_store_cache.check(sub_block); } void print(ostream& out) const; diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc index ae77d2a85..877a894fc 100644 --- a/src/mem/ruby/system/System.cc +++ b/src/mem/ruby/system/System.cc @@ -46,12 +46,48 @@ #include "mem/protocol/Chip.hh" //#include "mem/ruby/recorder/Tracer.hh" #include "mem/protocol/Protocol.hh" -//#include "XactIsolationChecker.hh" // gem5:Arka for decomissioning of log_tm -//#include "XactCommitArbiter.hh" -//#include "XactVisualizer.hh" -#include "mem/ruby/interfaces/M5Driver.hh" RubySystem::RubySystem() +{ + init(); + m_preinitialized_driver = false; + createDriver(); + + /* gem5:Binkert for decomissiong of tracer + m_tracer_ptr = new Tracer; + */ + + /* gem5:Arka for decomissiong of log_tm + if (XACT_MEMORY) { + m_xact_isolation_checker = new XactIsolationChecker; + m_xact_commit_arbiter = new XactCommitArbiter; + m_xact_visualizer = new XactVisualizer; + } +*/ +} + +RubySystem::RubySystem(Driver* _driver) +{ + init(); + m_preinitialized_driver = true; + m_driver_ptr = _driver; +} + +RubySystem::~RubySystem() +{ + for (int i = 0; i < m_chip_vector.size(); i++) { + delete m_chip_vector[i]; + } + if (!m_preinitialized_driver) + delete m_driver_ptr; + delete m_network_ptr; + delete m_profiler_ptr; + /* gem5:Binkert for decomissiong of tracer + delete m_tracer_ptr; + */ +} + +void RubySystem::init() { DEBUG_MSG(SYSTEM_COMP, MedPrio,"initializing"); @@ -101,44 +137,19 @@ RubySystem::RubySystem() } } #endif + DEBUG_MSG(SYSTEM_COMP, MedPrio,"finished initializing"); + DEBUG_NEWLINE(SYSTEM_COMP, MedPrio); +} +void RubySystem::createDriver() +{ if (g_SYNTHETIC_DRIVER && !g_DETERMINISTIC_DRIVER) { cerr << "Creating Synthetic Driver" << endl; m_driver_ptr = new SyntheticDriver(this); } else if (!g_SYNTHETIC_DRIVER && g_DETERMINISTIC_DRIVER) { cerr << "Creating Deterministic Driver" << endl; m_driver_ptr = new DeterministicDriver(this); - } else { - cerr << "Creating M5 Driver" << endl; - m_driver_ptr = new M5Driver(this); } - /* gem5:Binkert for decomissiong of tracer - m_tracer_ptr = new Tracer; - */ - - /* gem5:Arka for decomissiong of log_tm - if (XACT_MEMORY) { - m_xact_isolation_checker = new XactIsolationChecker; - m_xact_commit_arbiter = new XactCommitArbiter; - m_xact_visualizer = new XactVisualizer; - } -*/ - DEBUG_MSG(SYSTEM_COMP, MedPrio,"finished initializing"); - DEBUG_NEWLINE(SYSTEM_COMP, MedPrio); - -} - -RubySystem::~RubySystem() -{ - for (int i = 0; i < m_chip_vector.size(); i++) { - delete m_chip_vector[i]; - } - delete m_driver_ptr; - delete m_network_ptr; - delete m_profiler_ptr; - /* gem5:Binkert for decomissiong of tracer - delete m_tracer_ptr; - */ } void RubySystem::printConfig(ostream& out) const diff --git a/src/mem/ruby/system/System.hh b/src/mem/ruby/system/System.hh index 12063eeed..8679b55c3 100644 --- a/src/mem/ruby/system/System.hh +++ b/src/mem/ruby/system/System.hh @@ -63,6 +63,7 @@ class RubySystem { public: // Constructors RubySystem(); + RubySystem(Driver* _driver); // used when driver is already instantiated (e.g. M5's RubyMem) // Destructor ~RubySystem(); @@ -98,6 +99,8 @@ public: private: // Private Methods + void init(); + void createDriver(); // Private copy constructor and assignment operator RubySystem(const RubySystem& obj); @@ -107,6 +110,7 @@ private: Network* m_network_ptr; Vector m_chip_vector; Profiler* m_profiler_ptr; + bool m_preinitialized_driver; Driver* m_driver_ptr; Tracer* m_tracer_ptr; XactIsolationChecker *m_xact_isolation_checker; -- cgit v1.2.3