From aa86800e7a142f41a8fe957c367c133dea8d61bf Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Tue, 23 Apr 2013 00:03:02 -0500 Subject: ruby: patch checkpoint restore with garnet Due to recent changes to clocking system in Ruby and the way Ruby restores state from a checkpoint, garnet was failing to run from a checkpointed state. The problem is that Ruby resets the time to zero while warming up the caches. If any component records a local copy of the time (read calls curCycle()) before the simulation has started, then that component will not operate until that time is reached. In the context of this particular patch, the Garnet Network class calls curCycle() at multiple places. Any non-operational component can block in requests in the memory system, which the system interprets as a deadlock. This patch makes changes so that Garnet can successfully run from checkpointed state. It adds a globally visible time at which the actual execution started. This time is initialized in RubySystem::startup() function. This variable is only meant for components with in Ruby. This replaces the private variable that was maintained within Garnet since it is not possible to figure out the correct time when the value of this variable can be set. The patch also does away with all cases where curCycle() is called with in some Ruby component before the system has actually started executing. This is required due to the quirky manner in which ruby restores from a checkpoint. --- src/mem/ruby/system/System.cc | 34 +++++++++++++++++++++------------- src/mem/ruby/system/System.hh | 2 -- 2 files changed, 21 insertions(+), 15 deletions(-) (limited to 'src/mem/ruby/system') diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc index 617788b99..357511127 100644 --- a/src/mem/ruby/system/System.cc +++ b/src/mem/ruby/system/System.cc @@ -93,13 +93,6 @@ RubySystem::RubySystem(const Params *p) g_abs_controls.resize(MachineType_NUM); } -void -RubySystem::init() -{ - m_profiler_ptr->clearStats(); - m_network_ptr->clearStats(); -} - void RubySystem::registerNetwork(Network* network_ptr) { @@ -311,12 +304,6 @@ RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, void RubySystem::unserialize(Checkpoint *cp, const string §ion) { - // - // The main purpose for clearing stats in the unserialize process is so - // that the profiler can correctly set its start time to the unserialized - // value of curTick() - // - resetStats(); uint8_t *uncompressed_trace = NULL; if (m_mem_vec_ptr != NULL) { @@ -368,6 +355,23 @@ RubySystem::unserialize(Checkpoint *cp, const string §ion) void RubySystem::startup() { + + // Ruby restores state from a checkpoint by resetting the clock to 0 and + // playing the requests that can possibly re-generate the cache state. + // The clock value is set to the actual checkpointed value once all the + // requests have been executed. + // + // This way of restoring state is pretty finicky. For example, if a + // Ruby component reads time before the state has been restored, it would + // cache this value and hence its clock would not be reset to 0, when + // Ruby resets the global clock. This can potentially result in a + // deadlock. + // + // The solution is that no Ruby component should read time before the + // simulation starts. And then one also needs to hope that the time + // Ruby finishes restoring the state is less than the time when the + // state was checkpointed. + if (m_warmup_enabled) { // save the current tick value Tick curtick_original = curTick(); @@ -397,6 +401,8 @@ RubySystem::startup() setCurTick(curtick_original); resetClock(); } + + resetStats(); } void @@ -417,6 +423,8 @@ RubySystem::resetStats() for (uint32_t cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { m_abs_cntrl_vec[cntrl]->clearStats(); } + + g_ruby_start = curCycle(); } bool diff --git a/src/mem/ruby/system/System.hh b/src/mem/ruby/system/System.hh index 1e0be6da7..ea55a23e0 100644 --- a/src/mem/ruby/system/System.hh +++ b/src/mem/ruby/system/System.hh @@ -134,8 +134,6 @@ class RubySystem : public ClockedObject RubySystem(const RubySystem& obj); RubySystem& operator=(const RubySystem& obj); - void init(); - void readCompressedTrace(std::string filename, uint8_t *&raw_data, uint64& uncompressed_trace_size); -- cgit v1.2.3