diff options
Diffstat (limited to 'src/mem/ruby/system/RubySystem.cc')
-rw-r--r-- | src/mem/ruby/system/RubySystem.cc | 567 |
1 files changed, 567 insertions, 0 deletions
diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc new file mode 100644 index 000000000..454775178 --- /dev/null +++ b/src/mem/ruby/system/RubySystem.cc @@ -0,0 +1,567 @@ +/* + * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mem/ruby/system/RubySystem.hh" + +#include <fcntl.h> +#include <zlib.h> + +#include <cstdio> +#include <list> + +#include "base/intmath.hh" +#include "base/statistics.hh" +#include "debug/RubyCacheTrace.hh" +#include "debug/RubySystem.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/network/Network.hh" +#include "mem/simple_mem.hh" +#include "sim/eventq.hh" +#include "sim/simulate.hh" + +using namespace std; + +bool RubySystem::m_randomization; +uint32_t RubySystem::m_block_size_bytes; +uint32_t RubySystem::m_block_size_bits; +uint32_t RubySystem::m_memory_size_bits; +bool RubySystem::m_warmup_enabled = false; +// To look forward to allowing multiple RubySystem instances, track the number +// of RubySystems that need to be warmed up on checkpoint restore. +unsigned RubySystem::m_systems_to_warmup = 0; +bool RubySystem::m_cooldown_enabled = false; + +RubySystem::RubySystem(const Params *p) + : ClockedObject(p), m_access_backing_store(p->access_backing_store), + m_cache_recorder(NULL) +{ + m_randomization = p->randomization; + + m_block_size_bytes = p->block_size_bytes; + assert(isPowerOf2(m_block_size_bytes)); + m_block_size_bits = floorLog2(m_block_size_bytes); + m_memory_size_bits = p->memory_size_bits; + + // Resize to the size of different machine types + m_abstract_controls.resize(MachineType_NUM); + + // Collate the statistics before they are printed. + Stats::registerDumpCallback(new RubyStatsCallback(this)); + // Create the profiler + m_profiler = new Profiler(p, this); + m_phys_mem = p->phys_mem; +} + +void +RubySystem::registerNetwork(Network* network_ptr) +{ + m_network = network_ptr; +} + +void +RubySystem::registerAbstractController(AbstractController* cntrl) +{ + m_abs_cntrl_vec.push_back(cntrl); + + MachineID id = cntrl->getMachineID(); + m_abstract_controls[id.getType()][id.getNum()] = cntrl; +} + +RubySystem::~RubySystem() +{ + delete m_network; + delete m_profiler; +} + +void +RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace, + uint64_t cache_trace_size, + uint64_t block_size_bytes) +{ + vector<Sequencer*> sequencer_map; + Sequencer* sequencer_ptr = NULL; + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); + if (sequencer_ptr == NULL) { + sequencer_ptr = sequencer_map[cntrl]; + } + } + + assert(sequencer_ptr != NULL); + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + if (sequencer_map[cntrl] == NULL) { + sequencer_map[cntrl] = sequencer_ptr; + } + } + + // Remove the old CacheRecorder if it's still hanging about. + if (m_cache_recorder != NULL) { + delete m_cache_recorder; + } + + // Create the CacheRecorder and record the cache trace + m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, + sequencer_map, block_size_bytes); +} + +void +RubySystem::memWriteback() +{ + m_cooldown_enabled = true; + + // Make the trace so we know what to write back. + DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); + makeCacheRecorder(NULL, 0, getBlockSizeBytes()); + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); + } + DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); + + // save the current tick value + Tick curtick_original = curTick(); + DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original); + + // Deschedule all prior events on the event queue, but record the tick they + // were scheduled at so they can be restored correctly later. + list<pair<Event*, Tick> > original_events; + while (!eventq->empty()) { + Event *curr_head = eventq->getHead(); + if (curr_head->isAutoDelete()) { + DPRINTF(RubyCacheTrace, "Event %s auto-deletes when descheduled," + " not recording\n", curr_head->name()); + } else { + original_events.push_back(make_pair(curr_head, curr_head->when())); + } + eventq->deschedule(curr_head); + } + + // Schedule an event to start cache cooldown + DPRINTF(RubyCacheTrace, "Starting cache flush\n"); + enqueueRubyEvent(curTick()); + simulate(); + DPRINTF(RubyCacheTrace, "Cache flush complete\n"); + + // Deschedule any events left on the event queue. + while (!eventq->empty()) { + eventq->deschedule(eventq->getHead()); + } + + // Restore curTick + setCurTick(curtick_original); + + // Restore all events that were originally on the event queue. This is + // done after setting curTick back to its original value so that events do + // not seem to be scheduled in the past. + while (!original_events.empty()) { + pair<Event*, Tick> event = original_events.back(); + eventq->schedule(event.first, event.second); + original_events.pop_back(); + } + + // No longer flushing back to memory. + m_cooldown_enabled = false; + + // There are several issues with continuing simulation after calling + // memWriteback() at the moment, that stem from taking events off the + // queue, simulating again, and then putting them back on, whilst + // pretending that no time has passed. One is that some events will have + // been deleted, so can't be put back. Another is that any object + // recording the tick something happens may end up storing a tick in the + // future. A simple warning here alerts the user that things may not work + // as expected. + warn_once("Ruby memory writeback is experimental. Continuing simulation " + "afterwards may not always work as intended."); + + // Keep the cache recorder around so that we can dump the trace if a + // checkpoint is immediately taken. +} + +void +RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, + uint64_t uncompressed_trace_size) +{ + // Create the checkpoint file for the memory + string thefile = CheckpointIn::dir() + "/" + filename.c_str(); + + int fd = creat(thefile.c_str(), 0664); + if (fd < 0) { + perror("creat"); + fatal("Can't open memory trace file '%s'\n", filename); + } + + gzFile compressedMemory = gzdopen(fd, "wb"); + if (compressedMemory == NULL) + fatal("Insufficient memory to allocate compression state for %s\n", + filename); + + if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != + uncompressed_trace_size) { + fatal("Write failed on memory trace file '%s'\n", filename); + } + + if (gzclose(compressedMemory)) { + fatal("Close failed on memory trace file '%s'\n", filename); + } + delete[] raw_data; +} + +void +RubySystem::serialize(CheckpointOut &cp) const +{ + // Store the cache-block size, so we are able to restore on systems with a + // different cache-block size. CacheRecorder depends on the correct + // cache-block size upon unserializing. + uint64_t block_size_bytes = getBlockSizeBytes(); + SERIALIZE_SCALAR(block_size_bytes); + + // Check that there's a valid trace to use. If not, then memory won't be + // up-to-date and the simulation will probably fail when restoring from the + // checkpoint. + if (m_cache_recorder == NULL) { + fatal("Call memWriteback() before serialize() to create ruby trace"); + } + + // Aggregate the trace entries together into a single array + uint8_t *raw_data = new uint8_t[4096]; + uint64_t cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, + 4096); + string cache_trace_file = name() + ".cache.gz"; + writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); + + SERIALIZE_SCALAR(cache_trace_file); + SERIALIZE_SCALAR(cache_trace_size); +} + +void +RubySystem::drainResume() +{ + // Delete the cache recorder if it was created in memWriteback() + // to checkpoint the current cache state. + if (m_cache_recorder) { + delete m_cache_recorder; + m_cache_recorder = NULL; + } +} + +void +RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, + uint64_t &uncompressed_trace_size) +{ + // Read the trace file + gzFile compressedTrace; + + // trace file + int fd = open(filename.c_str(), O_RDONLY); + if (fd < 0) { + perror("open"); + fatal("Unable to open trace file %s", filename); + } + + compressedTrace = gzdopen(fd, "rb"); + if (compressedTrace == NULL) { + fatal("Insufficient memory to allocate compression state for %s\n", + filename); + } + + raw_data = new uint8_t[uncompressed_trace_size]; + if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < + uncompressed_trace_size) { + fatal("Unable to read complete trace from file %s\n", filename); + } + + if (gzclose(compressedTrace)) { + fatal("Failed to close cache trace file '%s'\n", filename); + } +} + +void +RubySystem::unserialize(CheckpointIn &cp) +{ + uint8_t *uncompressed_trace = NULL; + + // This value should be set to the checkpoint-system's block-size. + // Optional, as checkpoints without it can be run if the + // checkpoint-system's block-size == current block-size. + uint64_t block_size_bytes = getBlockSizeBytes(); + UNSERIALIZE_OPT_SCALAR(block_size_bytes); + + string cache_trace_file; + uint64_t cache_trace_size = 0; + + UNSERIALIZE_SCALAR(cache_trace_file); + UNSERIALIZE_SCALAR(cache_trace_size); + cache_trace_file = cp.cptDir + "/" + cache_trace_file; + + readCompressedTrace(cache_trace_file, uncompressed_trace, + cache_trace_size); + m_warmup_enabled = true; + m_systems_to_warmup++; + + // Create the cache recorder that will hang around until startup. + makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes); +} + +void +RubySystem::startup() +{ + + // Ruby restores state from a checkpoint by resetting the clock to 0 and + // playing the requests that can possibly re-generate the cache state. + // The clock value is set to the actual checkpointed value once all the + // requests have been executed. + // + // This way of restoring state is pretty finicky. For example, if a + // Ruby component reads time before the state has been restored, it would + // cache this value and hence its clock would not be reset to 0, when + // Ruby resets the global clock. This can potentially result in a + // deadlock. + // + // The solution is that no Ruby component should read time before the + // simulation starts. And then one also needs to hope that the time + // Ruby finishes restoring the state is less than the time when the + // state was checkpointed. + + if (m_warmup_enabled) { + DPRINTF(RubyCacheTrace, "Starting ruby cache warmup\n"); + // save the current tick value + Tick curtick_original = curTick(); + // save the event queue head + Event* eventq_head = eventq->replaceHead(NULL); + // set curTick to 0 and reset Ruby System's clock + setCurTick(0); + resetClock(); + + // Schedule an event to start cache warmup + enqueueRubyEvent(curTick()); + simulate(); + + delete m_cache_recorder; + m_cache_recorder = NULL; + m_systems_to_warmup--; + if (m_systems_to_warmup == 0) { + m_warmup_enabled = false; + } + + // Restore eventq head + eventq_head = eventq->replaceHead(eventq_head); + // Restore curTick and Ruby System's clock + setCurTick(curtick_original); + resetClock(); + } + + resetStats(); +} + +void +RubySystem::RubyEvent::process() +{ + if (RubySystem::getWarmupEnabled()) { + m_ruby_system->m_cache_recorder->enqueueNextFetchRequest(); + } else if (RubySystem::getCooldownEnabled()) { + m_ruby_system->m_cache_recorder->enqueueNextFlushRequest(); + } +} + +void +RubySystem::resetStats() +{ + m_start_cycle = curCycle(); +} + +bool +RubySystem::functionalRead(PacketPtr pkt) +{ + Addr address(pkt->getAddr()); + Addr line_address = makeLineAddress(address); + + AccessPermission access_perm = AccessPermission_NotPresent; + int num_controllers = m_abs_cntrl_vec.size(); + + DPRINTF(RubySystem, "Functional Read request for %s\n", address); + + unsigned int num_ro = 0; + unsigned int num_rw = 0; + unsigned int num_busy = 0; + unsigned int num_backing_store = 0; + unsigned int num_invalid = 0; + + // In this loop we count the number of controllers that have the given + // address in read only, read write and busy states. + for (unsigned int i = 0; i < num_controllers; ++i) { + access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); + if (access_perm == AccessPermission_Read_Only) + num_ro++; + else if (access_perm == AccessPermission_Read_Write) + num_rw++; + else if (access_perm == AccessPermission_Busy) + num_busy++; + else if (access_perm == AccessPermission_Backing_Store) + // See RubySlicc_Exports.sm for details, but Backing_Store is meant + // to represent blocks in memory *for Broadcast/Snooping protocols*, + // where memory has no idea whether it has an exclusive copy of data + // or not. + num_backing_store++; + else if (access_perm == AccessPermission_Invalid || + access_perm == AccessPermission_NotPresent) + num_invalid++; + } + assert(num_rw <= 1); + + // This if case is meant to capture what happens in a Broadcast/Snoop + // protocol where the block does not exist in the cache hierarchy. You + // only want to read from the Backing_Store memory if there is no copy in + // the cache hierarchy, otherwise you want to try to read the RO or RW + // copies existing in the cache hierarchy (covered by the else statement). + // The reason is because the Backing_Store memory could easily be stale, if + // there are copies floating around the cache hierarchy, so you want to read + // it only if it's not in the cache hierarchy at all. + if (num_invalid == (num_controllers - 1) && num_backing_store == 1) { + DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); + for (unsigned int i = 0; i < num_controllers; ++i) { + access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); + if (access_perm == AccessPermission_Backing_Store) { + m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); + return true; + } + } + } else if (num_ro > 0 || num_rw == 1) { + // In Broadcast/Snoop protocols, this covers if you know the block + // exists somewhere in the caching hierarchy, then you want to read any + // valid RO or RW block. In directory protocols, same thing, you want + // to read any valid readable copy of the block. + DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", + num_busy, num_ro, num_rw); + // In this loop, we try to figure which controller has a read only or + // a read write copy of the given address. Any valid copy would suffice + // for a functional read. + for (unsigned int i = 0;i < num_controllers;++i) { + access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); + if (access_perm == AccessPermission_Read_Only || + access_perm == AccessPermission_Read_Write) { + m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); + return true; + } + } + } + + return false; +} + +// The function searches through all the buffers that exist in different +// cache, directory and memory controllers, and in the network components +// and writes the data portion of those that hold the address specified +// in the packet. +bool +RubySystem::functionalWrite(PacketPtr pkt) +{ + Addr addr(pkt->getAddr()); + Addr line_addr = makeLineAddress(addr); + AccessPermission access_perm = AccessPermission_NotPresent; + int num_controllers = m_abs_cntrl_vec.size(); + + DPRINTF(RubySystem, "Functional Write request for %s\n", addr); + + uint32_t M5_VAR_USED num_functional_writes = 0; + + for (unsigned int i = 0; i < num_controllers;++i) { + num_functional_writes += + m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); + + access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); + if (access_perm != AccessPermission_Invalid && + access_perm != AccessPermission_NotPresent) { + num_functional_writes += + m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt); + } + } + + num_functional_writes += m_network->functionalWrite(pkt); + DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); + + return true; +} + +#ifdef CHECK_COHERENCE +// This code will check for cases if the given cache block is exclusive in +// one node and shared in another-- a coherence violation +// +// To use, the SLICC specification must call sequencer.checkCoherence(address) +// when the controller changes to a state with new permissions. Do this +// in setState. The SLICC spec must also define methods "isBlockShared" +// and "isBlockExclusive" that are specific to that protocol +// +void +RubySystem::checkGlobalCoherenceInvariant(const Address& addr) +{ +#if 0 + NodeID exclusive = -1; + bool sharedDetected = false; + NodeID lastShared = -1; + + for (int i = 0; i < m_chip_vector.size(); i++) { + if (m_chip_vector[i]->isBlockExclusive(addr)) { + if (exclusive != -1) { + // coherence violation + WARN_EXPR(exclusive); + WARN_EXPR(m_chip_vector[i]->getID()); + WARN_EXPR(addr); + WARN_EXPR(getTime()); + ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); + } else if (sharedDetected) { + WARN_EXPR(lastShared); + WARN_EXPR(m_chip_vector[i]->getID()); + WARN_EXPR(addr); + WARN_EXPR(getTime()); + ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); + } else { + exclusive = m_chip_vector[i]->getID(); + } + } else if (m_chip_vector[i]->isBlockShared(addr)) { + sharedDetected = true; + lastShared = m_chip_vector[i]->getID(); + + if (exclusive != -1) { + WARN_EXPR(lastShared); + WARN_EXPR(exclusive); + WARN_EXPR(addr); + WARN_EXPR(getTime()); + ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); + } + } + } +#endif +} +#endif + +RubySystem * +RubySystemParams::create() +{ + return new RubySystem(this); +} |