diff options
Diffstat (limited to 'src/mem/ruby/config')
-rw-r--r-- | src/mem/ruby/config/RubyConfig.cc | 193 | ||||
-rw-r--r-- | src/mem/ruby/config/RubyConfig.hh | 157 | ||||
-rw-r--r-- | src/mem/ruby/config/config.include | 323 | ||||
-rw-r--r-- | src/mem/ruby/config/rubyconfig.defaults | 466 | ||||
-rw-r--r-- | src/mem/ruby/config/tester.defaults | 60 |
5 files changed, 1199 insertions, 0 deletions
diff --git a/src/mem/ruby/config/RubyConfig.cc b/src/mem/ruby/config/RubyConfig.cc new file mode 100644 index 000000000..fe4e3be8f --- /dev/null +++ b/src/mem/ruby/config/RubyConfig.cc @@ -0,0 +1,193 @@ + +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * RubyConfig.C + * + * Description: See RubyConfig.h + * + * $Id$ + * + */ + +#include "RubyConfig.hh" +#include "protocol_name.hh" +#include "util.hh" +#include "interface.hh" +#include "Protocol.hh" + +#define CHECK_POWER_OF_2(N) { if (!is_power_of_2(N)) { ERROR_MSG(#N " must be a power of 2."); }} +#define CHECK_ZERO(N) { if (N != 0) { ERROR_MSG(#N " must be zero at initialization."); }} +#define CHECK_NON_ZERO(N) { if (N == 0) { ERROR_MSG(#N " must be non-zero."); }} + + +void RubyConfig::init() +{ + // MemoryControl: + CHECK_NON_ZERO(MEM_BUS_CYCLE_MULTIPLIER); + CHECK_NON_ZERO(BANKS_PER_RANK); + CHECK_NON_ZERO(RANKS_PER_DIMM); + CHECK_NON_ZERO(DIMMS_PER_CHANNEL); + CHECK_NON_ZERO(BANK_QUEUE_SIZE); + CHECK_NON_ZERO(BANK_BUSY_TIME); + CHECK_NON_ZERO(MEM_CTL_LATENCY); + CHECK_NON_ZERO(REFRESH_PERIOD); + CHECK_NON_ZERO(BASIC_BUS_BUSY_TIME); + + CHECK_POWER_OF_2(BANKS_PER_RANK); + CHECK_POWER_OF_2(RANKS_PER_DIMM); + CHECK_POWER_OF_2(DIMMS_PER_CHANNEL); + + CHECK_NON_ZERO(g_MEMORY_SIZE_BYTES); + CHECK_NON_ZERO(g_DATA_BLOCK_BYTES); + CHECK_NON_ZERO(g_PAGE_SIZE_BYTES); + CHECK_NON_ZERO(g_NUM_PROCESSORS); + CHECK_NON_ZERO(g_PROCS_PER_CHIP); + if(g_NUM_SMT_THREADS == 0){ //defaults to single-threaded + g_NUM_SMT_THREADS = 1; + } + if (g_NUM_L2_BANKS == 0) { // defaults to number of ruby nodes + g_NUM_L2_BANKS = g_NUM_PROCESSORS; + } + if (g_NUM_MEMORIES == 0) { // defaults to number of ruby nodes + g_NUM_MEMORIES = g_NUM_PROCESSORS; + } + + CHECK_ZERO(g_MEMORY_SIZE_BITS); + CHECK_ZERO(g_DATA_BLOCK_BITS); + CHECK_ZERO(g_PAGE_SIZE_BITS); + CHECK_ZERO(g_NUM_PROCESSORS_BITS); + CHECK_ZERO(g_NUM_CHIP_BITS); + CHECK_ZERO(g_NUM_L2_BANKS_BITS); + CHECK_ZERO(g_NUM_MEMORIES_BITS); + CHECK_ZERO(g_PROCS_PER_CHIP_BITS); + CHECK_ZERO(g_NUM_L2_BANKS_PER_CHIP); + CHECK_ZERO(g_NUM_L2_BANKS_PER_CHIP_BITS); + CHECK_ZERO(g_NUM_MEMORIES_BITS); + CHECK_ZERO(g_MEMORY_MODULE_BLOCKS); + CHECK_ZERO(g_MEMORY_MODULE_BITS); + CHECK_ZERO(g_NUM_MEMORIES_PER_CHIP); + + CHECK_POWER_OF_2(g_MEMORY_SIZE_BYTES); + CHECK_POWER_OF_2(g_DATA_BLOCK_BYTES); + CHECK_POWER_OF_2(g_NUM_PROCESSORS); + CHECK_POWER_OF_2(g_NUM_L2_BANKS); + CHECK_POWER_OF_2(g_NUM_MEMORIES); + CHECK_POWER_OF_2(g_PROCS_PER_CHIP); + + ASSERT(g_NUM_PROCESSORS >= g_PROCS_PER_CHIP); // obviously can't have less processors than procs/chip + g_NUM_CHIPS = g_NUM_PROCESSORS/g_PROCS_PER_CHIP; + ASSERT(g_NUM_L2_BANKS >= g_NUM_CHIPS); // cannot have a single L2cache across multiple chips + + g_NUM_L2_BANKS_PER_CHIP = g_NUM_L2_BANKS/g_NUM_CHIPS; + + ASSERT(L2_CACHE_NUM_SETS_BITS > log_int(g_NUM_L2_BANKS_PER_CHIP)); // cannot have less than one set per bank + L2_CACHE_NUM_SETS_BITS = L2_CACHE_NUM_SETS_BITS - log_int(g_NUM_L2_BANKS_PER_CHIP); + + if (g_NUM_CHIPS > g_NUM_MEMORIES) { + g_NUM_MEMORIES_PER_CHIP = 1; // some chips have a memory, others don't + } else { + g_NUM_MEMORIES_PER_CHIP = g_NUM_MEMORIES/g_NUM_CHIPS; + } + + g_NUM_CHIP_BITS = log_int(g_NUM_CHIPS); + g_MEMORY_SIZE_BITS = log_int(g_MEMORY_SIZE_BYTES); + g_DATA_BLOCK_BITS = log_int(g_DATA_BLOCK_BYTES); + g_PAGE_SIZE_BITS = log_int(g_PAGE_SIZE_BYTES); + g_NUM_PROCESSORS_BITS = log_int(g_NUM_PROCESSORS); + g_NUM_L2_BANKS_BITS = log_int(g_NUM_L2_BANKS); + g_NUM_L2_BANKS_PER_CHIP_BITS = log_int(g_NUM_L2_BANKS_PER_CHIP); + g_NUM_MEMORIES_BITS = log_int(g_NUM_MEMORIES); + g_PROCS_PER_CHIP_BITS = log_int(g_PROCS_PER_CHIP); + + g_MEMORY_MODULE_BITS = g_MEMORY_SIZE_BITS - g_DATA_BLOCK_BITS - g_NUM_MEMORIES_BITS; + g_MEMORY_MODULE_BLOCKS = (int64(1) << g_MEMORY_MODULE_BITS); + + if ((!Protocol::m_CMP) && (g_PROCS_PER_CHIP > 1)) { + ERROR_MSG("Non-CMP protocol should set g_PROCS_PER_CHIP to 1"); + } + + // Randomize the execution + srandom(g_RANDOM_SEED); +} + +int RubyConfig::L1CacheNumToL2Base(NodeID L1CacheNum) +{ + return L1CacheNum/g_PROCS_PER_CHIP; +} + +static void print_parameters(ostream& out) +{ + +#define PARAM(NAME) { out << #NAME << ": " << NAME << endl; } +#define PARAM_UINT(NAME) { out << #NAME << ": " << NAME << endl; } +#define PARAM_ULONG(NAME) { out << #NAME << ": " << NAME << endl; } +#define PARAM_BOOL(NAME) { out << #NAME << ": " << bool_to_string(NAME) << endl; } +#define PARAM_DOUBLE(NAME) { out << #NAME << ": " << NAME << endl; } +#define PARAM_STRING(NAME) { assert(NAME != NULL); out << #NAME << ": " << string(NAME) << endl; } +#define PARAM_ARRAY(PTYPE, NAME, ARRAY_SIZE) \ + { \ + out << #NAME << ": ("; \ + for (int i = 0; i < ARRAY_SIZE; i++) { \ + if (i != 0) { \ + out << ", "; \ + } \ + out << NAME[i]; \ + } \ + out << ")" << endl; \ + } \ + + +#include CONFIG_VAR_FILENAME +#undef PARAM +#undef PARAM_UINT +#undef PARAM_ULONG +#undef PARAM_BOOL +#undef PARAM_DOUBLE +#undef PARAM_STRING +#undef PARAM_ARRAY +} + +void RubyConfig::printConfiguration(ostream& out) { + out << "Ruby Configuration" << endl; + out << "------------------" << endl; + + out << "protocol: " << CURRENT_PROTOCOL << endl; + SIMICS_print_version(out); + out << "compiled_at: " << __TIME__ << ", " << __DATE__ << endl; + out << "RUBY_DEBUG: " << bool_to_string(RUBY_DEBUG) << endl; + + char buffer[100]; + gethostname(buffer, 50); + out << "hostname: " << buffer << endl; + + print_parameters(out); +} + + diff --git a/src/mem/ruby/config/RubyConfig.hh b/src/mem/ruby/config/RubyConfig.hh new file mode 100644 index 000000000..b2cc745bc --- /dev/null +++ b/src/mem/ruby/config/RubyConfig.hh @@ -0,0 +1,157 @@ + +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * RubyConfig.h + * + * Description: This class has only static members and class methods, + * and thus should never need to be instantiated. + * + * $Id$ + * + */ + +#ifndef RUBYCONFIG_H +#define RUBYCONFIG_H + +#include "Global.hh" +#define CONFIG_VAR_FILENAME "config.include" +#include "vardecl.hh" +#include "NodeID.hh" + +#define MEMORY_LATENCY RubyConfig::memoryResponseLatency() +#define ABORT_DELAY m_chip_ptr->getTransactionManager(m_version)->getAbortDelay() + +// Set paramterization +/* + * This defines the number of longs (32-bits on 32 bit machines, + * 64-bit on 64-bit AMD machines) to use to hold the set... + * the default is 4, allowing 128 or 256 different members + * of the set. + * + * This should never need to be changed for correctness reasons, + * though increasing it will increase performance for larger + * set sizes at the cost of a (much) larger memory footprint + * + */ +const int NUMBER_WORDS_PER_SET = 4; + +class RubyConfig { +public: + + // CACHE BLOCK CONFIG VARIBLES + static int dataBlockBits() { return g_DATA_BLOCK_BITS; } + static int dataBlockBytes() { return g_DATA_BLOCK_BYTES; } + + // SUPPORTED PHYSICAL MEMORY CONFIG VARIABLES + static int pageSizeBits() { return g_PAGE_SIZE_BITS; } + static int pageSizeBytes() { return g_PAGE_SIZE_BYTES; } + static int memorySizeBits() { return g_MEMORY_SIZE_BITS; } + static int64 memorySizeBytes() { return g_MEMORY_SIZE_BYTES; } + static int memoryModuleBits() { return g_MEMORY_MODULE_BITS; } + static int64 memoryModuleBlocks() { return g_MEMORY_MODULE_BLOCKS; } + + // returns number of SMT threads per physical processor + static int numberofSMTThreads() { return g_NUM_SMT_THREADS; } + // defines the number of simics processors (power of 2) + static int numberOfProcessors() { return g_NUM_PROCESSORS; } + static int procsPerChipBits() { return g_PROCS_PER_CHIP_BITS; } + static int numberOfProcsPerChip() { return g_PROCS_PER_CHIP; } + static int numberOfChips() { return g_NUM_CHIPS; } + + // MACHINE INSTANIATION CONFIG VARIABLES + // ------------------------------------- + // L1 CACHE MACHINES + // defines the number of L1banks - idependent of ruby chips (power of 2) + // NOTE - no protocols currently supports L1s != processors, just a placeholder + static int L1CacheBits() { return g_NUM_PROCESSORS_BITS; } + static int numberOfL1Cache() { return g_NUM_PROCESSORS; } + static int L1CachePerChipBits() { return procsPerChipBits() ; } // L1s != processors not currently supported + static int numberOfL1CachePerChip() { return numberOfProcsPerChip(); } // L1s != processors not currently supported + static int numberOfL1CachePerChip(NodeID myNodeID) { return numberOfL1CachePerChip(); } + static int L1CacheTransitionsPerCycle() { return L1CACHE_TRANSITIONS_PER_RUBY_CYCLE; } + + // L2 CACHE MACHINES + // defines the number of L2banks/L2Caches - idependent of ruby chips (power of 2) + static int L2CacheBits() { return g_NUM_L2_BANKS_BITS; } + static int numberOfL2Cache() { return g_NUM_L2_BANKS; } + static int L1CacheNumToL2Base(NodeID L1RubyNodeID); + static int L2CachePerChipBits() { return g_NUM_L2_BANKS_PER_CHIP_BITS; } + static int numberOfL2CachePerChip() { return g_NUM_L2_BANKS_PER_CHIP; } + static int numberOfL2CachePerChip(NodeID myNodeID) { return numberOfL2CachePerChip(); } + static int L2CacheTransitionsPerCycle() { return L2CACHE_TRANSITIONS_PER_RUBY_CYCLE; } + + // DIRECTORY/MEMORY MACHINES + // defines the number of ruby memories - idependent of ruby chips (power of 2) + static int memoryBits() { return g_NUM_MEMORIES_BITS; } + static int numberOfDirectory() { return numberOfMemories(); } + static int numberOfMemories() { return g_NUM_MEMORIES; } + static int numberOfDirectoryPerChip() { return g_NUM_MEMORIES_PER_CHIP; } + static int numberOfDirectoryPerChip(NodeID myNodeID) { return g_NUM_MEMORIES_PER_CHIP; } + static int DirectoryTransitionsPerCycle() { return DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE; } + + // PERSISTENT ARBITER MACHINES + static int numberOfPersistentArbiter() { return numberOfMemories(); } + static int numberOfPersistentArbiterPerChip() {return numberOfDirectoryPerChip(); } + static int numberOfPersistentArbiterPerChip(NodeID myNodeID) {return numberOfDirectoryPerChip(myNodeID); } + static int PersistentArbiterTransitionsPerCycle() { return L2CACHE_TRANSITIONS_PER_RUBY_CYCLE; } + + // ---- END MACHINE SPECIFIC VARIABLES ---- + + // VARIABLE MEMORY RESPONSE LATENCY + // *** NOTE *** This is where variation is added to the simulation + // see Alameldeen et al. HPCA 2003 for further details + static int memoryResponseLatency() { return MEMORY_RESPONSE_LATENCY_MINUS_2+(random() % 5); } + + static void init(); + static void printConfiguration(ostream& out); + + // Memory Controller + static int memBusCycleMultiplier () { return MEM_BUS_CYCLE_MULTIPLIER; } + static int banksPerRank () { return BANKS_PER_RANK; } + static int ranksPerDimm () { return RANKS_PER_DIMM; } + static int dimmsPerChannel () { return DIMMS_PER_CHANNEL; } + static int bankBit0 () { return BANK_BIT_0; } + static int rankBit0 () { return RANK_BIT_0; } + static int dimmBit0 () { return DIMM_BIT_0; } + static int bankQueueSize () { return BANK_QUEUE_SIZE; } + static int bankBusyTime () { return BANK_BUSY_TIME; } + static int rankRankDelay () { return RANK_RANK_DELAY; } + static int readWriteDelay () { return READ_WRITE_DELAY; } + static int basicBusBusyTime () { return BASIC_BUS_BUSY_TIME; } + static int memCtlLatency () { return MEM_CTL_LATENCY; } + static int refreshPeriod () { return REFRESH_PERIOD; } + static int tFaw () { return TFAW; } + static int memRandomArbitrate () { return MEM_RANDOM_ARBITRATE; } + static int memFixedDelay () { return MEM_FIXED_DELAY; } + +private: +}; + +#endif //RUBYCONFIG_H diff --git a/src/mem/ruby/config/config.include b/src/mem/ruby/config/config.include new file mode 100644 index 000000000..f853fb72b --- /dev/null +++ b/src/mem/ruby/config/config.include @@ -0,0 +1,323 @@ +// +// This file has been modified by Kevin Moore and Dan Nussbaum of the +// Scalable Systems Research Group at Sun Microsystems Laboratories +// (http://research.sun.com/scalable/) to support the Adaptive +// Transactional Memory Test Platform (ATMTP). For information about +// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/. +// +// Please send email to atmtp-interest@sun.com with feedback, questions, or +// to request future announcements about ATMTP. +// +// ---------------------------------------------------------------------- +// +// File modification date: 2008-02-23 +// +// ---------------------------------------------------------------------- +// +// ATMTP is distributed as part of the GEMS software toolset and is +// available for use and modification under the terms of version 2 of the +// GNU General Public License. The GNU General Public License is contained +// in the file $GEMS/LICENSE. +// +// Multifacet GEMS is free software; you can redistribute it and/or modify +// it under the terms of version 2 of the GNU General Public License as +// published by the Free Software Foundation. +// +// Multifacet GEMS is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with the Multifacet GEMS; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA +// +// ---------------------------------------------------------------------- +// + +// see rubyconfig.defaults for some explanations + +PARAM( g_RANDOM_SEED ); + +// Maximum number of cycles a request is can be outstanding before the +// Sequencer of StoreBuffer declares we're in deadlock/livelock +PARAM( g_DEADLOCK_THRESHOLD ); +PARAM_BOOL( RANDOMIZATION ); +PARAM_BOOL( g_SYNTHETIC_DRIVER ); +PARAM_BOOL( g_DETERMINISTIC_DRIVER ); + +// FOR MOESI_CMP_token +PARAM_BOOL( g_FILTERING_ENABLED ); +PARAM_BOOL( g_DISTRIBUTED_PERSISTENT_ENABLED ); +PARAM_BOOL( g_DYNAMIC_TIMEOUT_ENABLED ); +PARAM( g_RETRY_THRESHOLD ); +PARAM( g_FIXED_TIMEOUT_LATENCY ); + +PARAM( g_trace_warmup_length ); +PARAM_DOUBLE( g_bash_bandwidth_adaptive_threshold ); + +PARAM( g_tester_length ); +PARAM( g_synthetic_locks ); +PARAM( g_deterministic_addrs ); +// Specified Generator: See SpecifiedGeneratorType in external.sm for valid values +PARAM_STRING( g_SpecifiedGenerator ); +PARAM( g_callback_counter ); +PARAM( g_NUM_COMPLETIONS_BEFORE_PASS ); + +PARAM( g_NUM_SMT_THREADS ); + +PARAM( g_think_time ); +PARAM( g_hold_time ); +PARAM( g_wait_time ); + +// For debugging purposes, one can enable a trace of all the protocol +// state machine changes. Unfortunately, the code to generate the +// trace is protocol specific. To enable the code for some of the +// standard protocols, +// 1. change "PROTOCOL_DEBUG_TRACE = true" +// 2. enable debug in Makefile +// 3. use the "--start 1" command line parameter or +// "g_debug_ptr->setDebugTime(1)" to beging the following to set the +// debug begin time +// +// this use to be ruby/common/Global.h + +PARAM_BOOL( PROTOCOL_DEBUG_TRACE ); +// a string for filtering debugging output (for all g_debug vars see Debug.h) +PARAM_STRING( DEBUG_FILTER_STRING ); +// filters debugging messages based on priority (low, med, high) +PARAM_STRING( DEBUG_VERBOSITY_STRING ); +// filters debugging messages based on a ruby time +PARAM_ULONG( DEBUG_START_TIME ); +// sends debugging messages to a output filename +PARAM_STRING( DEBUG_OUTPUT_FILENAME ); + +// defines relative (integer) clock multipliers between ruby, opal, and simics +PARAM( SIMICS_RUBY_MULTIPLIER ); +PARAM( OPAL_RUBY_MULTIPLIER ); + +PARAM_BOOL( TRANSACTION_TRACE_ENABLED ); +PARAM_BOOL( USER_MODE_DATA_ONLY ); +PARAM_BOOL( PROFILE_HOT_LINES ); + +// PROFILE_ALL_INSTRUCTIONS is used if you want Ruby to profile all instructions executed +// The following need to be true for this to work correctly: +// 1. Disable istc and dstc for this simulation run +// 2. Add the following line to the object "sim" in the checkpoint you run from: +// instruction_profile_line_size: 4 +// This is used to have simics report back all instruction requests + +// For more details on how to find out how to interpret the output physical instruction +// address, please read the document in the simics-howto directory +PARAM_BOOL( PROFILE_ALL_INSTRUCTIONS ); + +// Set the following variable to true if you want a complete trace of +// PCs (physical address of program counters, with executing processor IDs) +// to be printed to stdout. Make sure to direct the simics output to a file. +// Otherwise, the run will take a really long time! +// A long run may write a file that can exceed the OS limit on file length +PARAM_BOOL( PRINT_INSTRUCTION_TRACE ); +PARAM( g_DEBUG_CYCLE ); + +// Don't allow any datablocks to enter the STC +PARAM_BOOL( BLOCK_STC ); + +// Make the entire memory system perfect +PARAM_BOOL( PERFECT_MEMORY_SYSTEM ); +PARAM( PERFECT_MEMORY_SYSTEM_LATENCY ); + +PARAM_BOOL( DATA_BLOCK ); // Define NO_DATA_BLOCK to make the DataBlock take zero space + +PARAM_BOOL( REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH ); + +// ********************************************* +// CACHE & MEMORY PARAMETERS +// ********************************************* + +PARAM_BOOL( g_SIMICS ); + +PARAM( L1_CACHE_ASSOC ); +PARAM( L1_CACHE_NUM_SETS_BITS ); +PARAM( L2_CACHE_ASSOC ); +PARAM( L2_CACHE_NUM_SETS_BITS ); + +PARAM_ULONG( g_MEMORY_SIZE_BYTES ); +PARAM( g_DATA_BLOCK_BYTES ); +// The following page size parameter is used by the stride prefetcher +PARAM( g_PAGE_SIZE_BYTES ); +PARAM_STRING( g_REPLACEMENT_POLICY ); + +PARAM( g_NUM_PROCESSORS ); +PARAM( g_NUM_L2_BANKS ); +PARAM( g_NUM_MEMORIES ); +PARAM( g_PROCS_PER_CHIP ); + +// The following group of parameters are calculated. They must +// _always_ be left at zero. +PARAM( g_NUM_CHIPS ); +PARAM( g_NUM_CHIP_BITS ); +PARAM( g_MEMORY_SIZE_BITS ); +PARAM( g_DATA_BLOCK_BITS ); +PARAM( g_PAGE_SIZE_BITS ); +PARAM( g_NUM_PROCESSORS_BITS ); +PARAM( g_PROCS_PER_CHIP_BITS ); +PARAM( g_NUM_L2_BANKS_BITS ); +PARAM( g_NUM_L2_BANKS_PER_CHIP_BITS ); +PARAM( g_NUM_L2_BANKS_PER_CHIP ); +PARAM( g_NUM_MEMORIES_BITS ); +PARAM( g_NUM_MEMORIES_PER_CHIP ); +PARAM( g_MEMORY_MODULE_BITS ); +PARAM_ULONG( g_MEMORY_MODULE_BLOCKS ); + +// determines the mapping between L2 banks and sets within L2 banks +PARAM_BOOL( MAP_L2BANKS_TO_LOWEST_BITS ); + +// TIMING PARAMETERS +PARAM( DIRECTORY_CACHE_LATENCY ); + +PARAM( NULL_LATENCY ); +PARAM( ISSUE_LATENCY ); +PARAM( CACHE_RESPONSE_LATENCY ); +PARAM( L2_RESPONSE_LATENCY ); +PARAM( L2_TAG_LATENCY ); +PARAM( L1_RESPONSE_LATENCY ); +PARAM( MEMORY_RESPONSE_LATENCY_MINUS_2 ); +PARAM( DIRECTORY_LATENCY ); +PARAM( NETWORK_LINK_LATENCY ); +PARAM( COPY_HEAD_LATENCY ); +PARAM( ON_CHIP_LINK_LATENCY ); +PARAM( RECYCLE_LATENCY ); +PARAM( L2_RECYCLE_LATENCY ); +PARAM( TIMER_LATENCY ); +PARAM( TBE_RESPONSE_LATENCY ); +PARAM_BOOL( PERIODIC_TIMER_WAKEUPS ); + +// constants used by TM protocols +PARAM_BOOL( PROFILE_EXCEPTIONS ); +PARAM_BOOL( PROFILE_XACT ); +PARAM_BOOL( PROFILE_NONXACT ); +PARAM_BOOL( XACT_DEBUG ); +PARAM ( XACT_DEBUG_LEVEL ); +PARAM_BOOL( XACT_MEMORY ); +PARAM_BOOL( XACT_ENABLE_TOURMALINE ); +PARAM( XACT_NUM_CURRENT ); +PARAM( XACT_LAST_UPDATE ); +PARAM_BOOL( XACT_ISOLATION_CHECK ); +PARAM_BOOL( PERFECT_FILTER ); +PARAM_STRING( READ_WRITE_FILTER ); +PARAM_BOOL( PERFECT_VIRTUAL_FILTER ); +PARAM_STRING( VIRTUAL_READ_WRITE_FILTER ); +PARAM_BOOL( PERFECT_SUMMARY_FILTER ); +PARAM_STRING( SUMMARY_READ_WRITE_FILTER ); +PARAM_BOOL( XACT_EAGER_CD ); +PARAM_BOOL( XACT_LAZY_VM ); +PARAM_STRING( XACT_CONFLICT_RES ); +PARAM_BOOL( XACT_VISUALIZER ); +PARAM( XACT_COMMIT_TOKEN_LATENCY ) ; +PARAM_BOOL( XACT_NO_BACKOFF ); +PARAM ( XACT_LOG_BUFFER_SIZE ); +PARAM ( XACT_STORE_PREDICTOR_HISTORY); +PARAM ( XACT_STORE_PREDICTOR_ENTRIES); +PARAM ( XACT_STORE_PREDICTOR_THRESHOLD); +PARAM ( XACT_FIRST_ACCESS_COST ); +PARAM ( XACT_FIRST_PAGE_ACCESS_COST ); +PARAM_BOOL( ENABLE_MAGIC_WAITING ); +PARAM_BOOL( ENABLE_WATCHPOINT ); +PARAM_BOOL( XACT_ENABLE_VIRTUALIZATION_LOGTM_SE ); + +// ATMTP +PARAM_BOOL( ATMTP_ENABLED ); +PARAM_BOOL( ATMTP_ABORT_ON_NON_XACT_INST ); +PARAM_BOOL( ATMTP_ALLOW_SAVE_RESTORE_IN_XACT ); +PARAM( ATMTP_XACT_MAX_STORES ); +PARAM( ATMTP_DEBUG_LEVEL ); + +// constants used by CMP protocols +PARAM( L1_REQUEST_LATENCY ); +PARAM( L2_REQUEST_LATENCY ); +PARAM_BOOL( SINGLE_ACCESS_L2_BANKS ); // hack to simulate multi-cycle L2 bank accesses + +// Ruby cycles between when a sequencer issues a miss it arrives at +// the L1 cache controller +PARAM( SEQUENCER_TO_CONTROLLER_LATENCY ); + +// Number of transitions each controller state machines can complete per cycle +PARAM( L1CACHE_TRANSITIONS_PER_RUBY_CYCLE ); +PARAM( L2CACHE_TRANSITIONS_PER_RUBY_CYCLE ); +PARAM( DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE ); + +// Maximum number of requests (including prefetches) outstanding from +// the sequencer (Note: this also include items buffered in the store +// buffer) +PARAM( g_SEQUENCER_OUTSTANDING_REQUESTS ); + +// Number of TBEs available for demand misses, prefetches, and replacements +PARAM( NUMBER_OF_TBES ); +PARAM( NUMBER_OF_L1_TBES ); +PARAM( NUMBER_OF_L2_TBES ); + +// NOTE: Finite buffering allows us to simulate a wormhole routed network +// with idealized flow control. All message buffers within the network (i.e. +// the switch's input and output buffers) are set to the size specified below +// by the PROTOCOL_BUFFER_SIZE +PARAM_BOOL( FINITE_BUFFERING ); +PARAM( FINITE_BUFFER_SIZE ); // Zero is unbounded buffers +// Number of requests buffered between the sequencer and the L1 conroller +// This can be more accurately simulated in Opal, therefore it's set to an +// infinite number +// Only effects the simualtion when FINITE_BUFFERING is enabled +PARAM( PROCESSOR_BUFFER_SIZE ); +// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to +// Controllers. Controlls the number of request issued by the L2 HW Prefetcher +PARAM( PROTOCOL_BUFFER_SIZE ); + +// Enable the TSO (Total Store Order) memory model +PARAM_BOOL( TSO ); // Note: This also disables the "write" STCs + +// NETWORK PARAMETERS + +// Network Topology: See TopologyType in external.sm for valid values +PARAM_STRING( g_NETWORK_TOPOLOGY ); + +// Cache Design specifies file prefix for topology +PARAM_STRING( g_CACHE_DESIGN ); + +PARAM( g_endpoint_bandwidth ); +PARAM_BOOL( g_adaptive_routing ); +PARAM( NUMBER_OF_VIRTUAL_NETWORKS ); +PARAM( FAN_OUT_DEGREE ); +PARAM_BOOL( g_PRINT_TOPOLOGY ); + +// transactional memory +PARAM( XACT_LENGTH ); +PARAM( XACT_SIZE ); +PARAM( ABORT_RETRY_TIME ); + +// Princeton Network (Garnet) +PARAM_BOOL( g_GARNET_NETWORK ); +PARAM_BOOL( g_DETAIL_NETWORK ); +PARAM_BOOL( g_NETWORK_TESTING ); +PARAM( g_FLIT_SIZE ); +PARAM( g_NUM_PIPE_STAGES ); +PARAM( g_VCS_PER_CLASS ); +PARAM( g_BUFFER_SIZE ); + +// MemoryControl: +PARAM( MEM_BUS_CYCLE_MULTIPLIER ); +PARAM( BANKS_PER_RANK ); +PARAM( RANKS_PER_DIMM ); +PARAM( DIMMS_PER_CHANNEL ); +PARAM( BANK_BIT_0 ); +PARAM( RANK_BIT_0 ); +PARAM( DIMM_BIT_0 ); +PARAM( BANK_QUEUE_SIZE ); +PARAM( BANK_BUSY_TIME ); +PARAM( RANK_RANK_DELAY ); +PARAM( READ_WRITE_DELAY ); +PARAM( BASIC_BUS_BUSY_TIME ); +PARAM( MEM_CTL_LATENCY ); +PARAM( REFRESH_PERIOD ); +PARAM( TFAW ); +PARAM( MEM_RANDOM_ARBITRATE ); +PARAM( MEM_FIXED_DELAY ); + diff --git a/src/mem/ruby/config/rubyconfig.defaults b/src/mem/ruby/config/rubyconfig.defaults new file mode 100644 index 000000000..3b86b4645 --- /dev/null +++ b/src/mem/ruby/config/rubyconfig.defaults @@ -0,0 +1,466 @@ +// +// This file has been modified by Kevin Moore and Dan Nussbaum of the +// Scalable Systems Research Group at Sun Microsystems Laboratories +// (http://research.sun.com/scalable/) to support the Adaptive +// Transactional Memory Test Platform (ATMTP). For information about +// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/. +// +// Please send email to atmtp-interest@sun.com with feedback, questions, or +// to request future announcements about ATMTP. +// +// ---------------------------------------------------------------------- +// +// File modification date: 2008-02-23 +// +// ---------------------------------------------------------------------- +// +// ATMTP is distributed as part of the GEMS software toolset and is +// available for use and modification under the terms of version 2 of the +// GNU General Public License. The GNU General Public License is contained +// in the file $GEMS/LICENSE. +// +// Multifacet GEMS is free software; you can redistribute it and/or modify +// it under the terms of version 2 of the GNU General Public License as +// published by the Free Software Foundation. +// +// Multifacet GEMS is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with the Multifacet GEMS; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA +// +// ---------------------------------------------------------------------- +// + +g_RANDOM_SEED: 1 +g_SIMICS: true + +g_DEADLOCK_THRESHOLD: 500000 + +// determines how many Simics cycles advance for every Ruby cycle +// (does not apply when running Opal) +SIMICS_RUBY_MULTIPLIER: 4 + +// corresponding parameter when using Opal+Ruby+Simics +OPAL_RUBY_MULTIPLIER: 1 + + +// Ruby cycles between when a sequencer issues a request and it arrives at +// the L1 cache controller +// +// ** important ** this parameter determines the L2 hit latency when +// using the SMP protocols with a combined L1/L2 controller (-cache.sm) +// +SEQUENCER_TO_CONTROLLER_LATENCY: 4 + + +// When set to false, the L1 cache structures are probed for a hit in Sequencer.C +// If a request hits, it is *not* issued to the cache controller +// When set to true, all processor data requests issue to cache controller +// +// ** important ** this parameter must be set to false for proper L1/L2 hit timing +// for the SMP protocols with combined L1/L2 controllers (-cache.sm) +// +REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false + + +// When running with Opal in SMT configurations, this indicates the number of threads per physical processor +g_NUM_SMT_THREADS: 1 + + +// Maximum number of requests (including SW prefetches) outstanding from +// the sequencer (Note: this also include items buffered in the store +// buffer) +g_SEQUENCER_OUTSTANDING_REQUESTS: 16 + + +PROTOCOL_DEBUG_TRACE: true +DEBUG_FILTER_STRING: none +DEBUG_VERBOSITY_STRING: none +DEBUG_START_TIME: 0 +DEBUG_OUTPUT_FILENAME: none + + +TRANSACTION_TRACE_ENABLED: false +USER_MODE_DATA_ONLY: false +PROFILE_HOT_LINES: false + +PROFILE_ALL_INSTRUCTIONS: false +PRINT_INSTRUCTION_TRACE: false +g_DEBUG_CYCLE: 0 +BLOCK_STC: false +PERFECT_MEMORY_SYSTEM: false +PERFECT_MEMORY_SYSTEM_LATENCY: 0 +DATA_BLOCK: false + + +// ********************************************* +// CACHE & MEMORY PARAMETERS +// ********************************************* + + +L1_CACHE_ASSOC: 4 +L1_CACHE_NUM_SETS_BITS: 8 +L2_CACHE_ASSOC: 4 +L2_CACHE_NUM_SETS_BITS: 16 + +// 32 bits = 4 GB address space +g_MEMORY_SIZE_BYTES: 4294967296 +g_DATA_BLOCK_BYTES: 64 +g_PAGE_SIZE_BYTES: 4096 +g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU + +g_PROCS_PER_CHIP: 1 + + +// set automatically +g_NUM_PROCESSORS: 0 +g_NUM_L2_BANKS: 0 +g_NUM_MEMORIES: 0 + +// The following group of parameters are calculated. They must +// _always_ be left at zero. +g_NUM_CHIPS: 0 +g_NUM_CHIP_BITS: 0 +g_MEMORY_SIZE_BITS: 0 +g_DATA_BLOCK_BITS: 0 +g_PAGE_SIZE_BITS: 0 +g_NUM_PROCESSORS_BITS: 0 +g_PROCS_PER_CHIP_BITS: 0 +g_NUM_L2_BANKS_BITS: 0 +g_NUM_L2_BANKS_PER_CHIP: 0 +g_NUM_L2_BANKS_PER_CHIP_BITS: 0 +g_NUM_MEMORIES_BITS: 0 +g_NUM_MEMORIES_PER_CHIP: 0 +g_MEMORY_MODULE_BITS: 0 +g_MEMORY_MODULE_BLOCKS: 0 + + +// For certain CMP protocols, determines whether the lowest bits of a block address +// are used to index to a L2 cache bank or into the sets of a +// single bank +// lowest highest +// true: g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS +// false: g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS +MAP_L2BANKS_TO_LOWEST_BITS: false + + + +// TIMING PARAMETERS -- many of these are protocol specific. See SLICC files +// to determine where they apply + +MEMORY_RESPONSE_LATENCY_MINUS_2: 158 // determines memory response latency +DIRECTORY_CACHE_LATENCY: 6 +NULL_LATENCY: 1 +ISSUE_LATENCY: 2 +CACHE_RESPONSE_LATENCY: 12 +L1_RESPONSE_LATENCY: 3 +L2_RESPONSE_LATENCY: 6 +L2_TAG_LATENCY: 6 +DIRECTORY_LATENCY: 80 +NETWORK_LINK_LATENCY: 1 +COPY_HEAD_LATENCY: 4 +ON_CHIP_LINK_LATENCY: 1 +RECYCLE_LATENCY: 10 +L2_RECYCLE_LATENCY: 5 +TIMER_LATENCY: 10000 +TBE_RESPONSE_LATENCY: 1 +PERIODIC_TIMER_WAKEUPS: true + + +// constants used by CMP protocols +// cache bank access times +L1_REQUEST_LATENCY: 2 +L2_REQUEST_LATENCY: 4 + + + + +// Number of transitions each controller state machines can complete per cycle +// i.e. the number of ports to each controller +// L1cache is the sum of the L1I and L1D cache ports +L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32 +// Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a +// much greater constraint on the concurrency of a L2 cache bank +L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32 +DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32 + + +// Number of TBEs available for demand misses, ALL prefetches, and replacements +// used by one-level protocols +NUMBER_OF_TBES: 128 +// two-level protocols +NUMBER_OF_L1_TBES: 32 +NUMBER_OF_L2_TBES: 32 + +// TSO is deprecated +TSO: false + + +// ** INTERCONECT PARAMETERS ** +// +g_PRINT_TOPOLOGY: true +g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH +g_CACHE_DESIGN: NUCA // specifies file prefix for FILE_SPECIFIED topology +FAN_OUT_DEGREE: 4 // for HIERARCHICAL SWITCH topology + +g_adaptive_routing: true +NUMBER_OF_VIRTUAL_NETWORKS: 4 + +// bandwidth unit is 1/1000 byte per cycle. the following parameter is multiplied by +// topology specific link weights +g_endpoint_bandwidth: 10000 + + +// ** finite buffering parameters +// +// note: Finite buffering allows us to simulate a realistic virtual cut-through +// routed network with idealized flow control. this feature is NOT heavily tested +FINITE_BUFFERING: false +// All message buffers within the network (i.e. the switch's input and +// output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE +FINITE_BUFFER_SIZE: 3 +// g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests +// issued by the sequencer. The PROCESSOR_BUFFER_SIZE controlls the +// number of requests in the mandatory queue +// Only effects the simualtion when FINITE_BUFFERING is enabled +PROCESSOR_BUFFER_SIZE: 10 +// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to +// Controllers. Controlls the number of request issued by the L2 HW Prefetcher +PROTOCOL_BUFFER_SIZE: 32 +// ** end finite buffering parameters + + +// (deprecated) +// Allows on a single accesses to a multi-cycle L2 bank. +// Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY +// number of cycles. However the TBE table can be accessed in parallel. +SINGLE_ACCESS_L2_BANKS: true + + +// constants used by TM protocols +PROFILE_EXCEPTIONS: false +PROFILE_XACT: true +PROFILE_NONXACT: false +XACT_DEBUG: true +XACT_DEBUG_LEVEL: 1 +//XACT_MEMORY: true // set to true for TM protocols. set it HERE for lazy systems to register the proper SIMICS interfaces +XACT_MEMORY: false +XACT_ENABLE_TOURMALINE: false // perfect memory system +XACT_NUM_CURRENT: 0 // must be 0 +XACT_LAST_UPDATE: 0 // must be 0 +XACT_ISOLATION_CHECK: false // Checks whether each memory access preserves transaction isolation +PERFECT_FILTER: true // If true, use perfect physical read/write filters +READ_WRITE_FILTER: Perfect_ +PERFECT_VIRTUAL_FILTER: true // If true, use perfect virtual read/write filters +VIRTUAL_READ_WRITE_FILTER: Perfect_ +PERFECT_SUMMARY_FILTER: true // If true, use perfect summary read/write filters +SUMMARY_READ_WRITE_FILTER: Perfect_ +XACT_EAGER_CD: true +XACT_LAZY_VM: false +XACT_CONFLICT_RES: BASE +XACT_COMMIT_TOKEN_LATENCY: 0 +XACT_VISUALIZER: false +XACT_NO_BACKOFF: false +XACT_LOG_BUFFER_SIZE: 0 +XACT_STORE_PREDICTOR_ENTRIES: 256 +XACT_STORE_PREDICTOR_HISTORY: 256 +XACT_STORE_PREDICTOR_THRESHOLD: 4 +XACT_FIRST_ACCESS_COST: 0 +XACT_FIRST_PAGE_ACCESS_COST: 0 +ENABLE_MAGIC_WAITING: false +ENABLE_WATCHPOINT: false +XACT_ENABLE_VIRTUALIZATION_LOGTM_SE: false +// g_NETWORK_TOPOLOGY: FILE_SPECIFIED +// NUMBER_OF_VIRTUAL_NETWORKS: 5 +// L2_REQUEST_LATENCY: 15 +// SEQUENCER_TO_CONTROLLER_LATENCY: 3 +// L2_RESPONSE_LATENCY: 20 +// L2_TAG_LATENCY: 6 +// MEMORY_RESPONSE_LATENCY_MINUS_2: 448 +// RECYCLE_LATENCY: 1 +// g_MEMORY_SIZE_BYTES: 268435456 +// REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: true + +// ATMTP +ATMTP_ENABLED: false +ATMTP_ABORT_ON_NON_XACT_INST: false +ATMTP_ALLOW_SAVE_RESTORE_IN_XACT: false +ATMTP_XACT_MAX_STORES: 32 +ATMTP_DEBUG_LEVEL: 0 + +// MOESI_CMP_token parameters (some might be deprecated) +g_FILTERING_ENABLED: false +g_DISTRIBUTED_PERSISTENT_ENABLED: true +g_RETRY_THRESHOLD: 1 +g_DYNAMIC_TIMEOUT_ENABLED: true +g_FIXED_TIMEOUT_LATENCY: 300 + + +// tester parameters (overridden by testerconfig.defaults) +// +// injects random message delays to excite protocol races +RANDOMIZATION: false +g_SYNTHETIC_DRIVER: false +g_DETERMINISTIC_DRIVER: false +g_trace_warmup_length: 1000000 +g_bash_bandwidth_adaptive_threshold: 0.75 + +g_tester_length: 0 +// # of synthetic locks == 16 * 128 +g_synthetic_locks: 2048 +g_deterministic_addrs: 1 +g_SpecifiedGenerator: DetermInvGenerator +g_callback_counter: 0 +g_NUM_COMPLETIONS_BEFORE_PASS: 0 +// parameters used by locking synthetic tester +g_think_time: 5 +g_hold_time: 5 +g_wait_time: 5 + +// Princeton Network (Garnet) +g_GARNET_NETWORK: false +g_DETAIL_NETWORK: false +g_NETWORK_TESTING: false +g_FLIT_SIZE: 16 +g_NUM_PIPE_STAGES: 4 +g_VCS_PER_CLASS: 4 +g_BUFFER_SIZE: 4 + +/////////////////////////////////////////////////////////////////////////////// +// +// MemoryControl: + +// Basic cycle time of the memory controller. This defines the period which is +// used as the memory channel clock period, the address bus bit time, and the +// memory controller cycle time. +// Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data), +// and a 2 GHz Ruby clock: +MEM_BUS_CYCLE_MULTIPLIER: 10 + +// How many internal banks in each DRAM chip: +BANKS_PER_RANK: 8 + +// How many sets of DRAM chips per DIMM. +RANKS_PER_DIMM: 2 + +// How many DIMMs per channel. (Currently the only thing that +// matters is the number of ranks per channel, i.e. the product +// of this parameter and RANKS_PER_DIMM. But if and when this is +// expanded to do FB-DIMMs, the distinction between the two +// will matter.) +DIMMS_PER_CHANNEL: 2 + +// Which bits to use to find the bank, rank, and DIMM numbers. +// You could choose to have the bank bits, rank bits, and DIMM bits +// in any order; here they are in that order. +// For these defaults, we assume this format for addresses: +// Offset within line: [5:0] +// Memory controller #: [7:6] +// Bank: [10:8] +// Rank: [11] +// DIMM: [12] +// Row addr / Col addr: [top:13] +// If you get these bits wrong, then some banks won't see any +// requests; you need to check for this in the .stats output. +BANK_BIT_0: 8 +RANK_BIT_0: 11 +DIMM_BIT_0: 12 + +// Number of entries max in each bank queues; set to whatever you want. +// If it is too small, you will see in the .stats file a lot of delay +// time spent in the common input queue. +BANK_QUEUE_SIZE: 12 + +// Bank cycle time (tRC) measured in memory cycles: +BANK_BUSY_TIME: 11 + +// This is how many memory address cycles to delay between reads to +// different ranks of DRAMs to allow for clock skew: +RANK_RANK_DELAY: 1 + +// This is how many memory address cycles to delay between a read +// and a write. This is based on two things: (1) the data bus is +// used one cycle earlier in the operation; (2) a round-trip wire +// delay from the controller to the DIMM that did the reading. +READ_WRITE_DELAY: 2 + +// Basic address and data bus occupancy. If you are assuming a +// 16-byte-wide data bus (pairs of DIMMs side-by-side), then +// the data bus occupancy matches the address bus occupancy at +// two cycles. But if the channel is only 8 bytes wide, you +// need to increase this bus occupancy time to 4 cycles. +BASIC_BUS_BUSY_TIME: 2 + +// Latency to returning read request or writeback acknowledgement. +// Measured in memory address cycles. +// This equals tRCD + CL + AL + (four bit times) +// + (round trip on channel) +// + (memory control internal delays) +// It's going to be an approximation, so pick what you like. +// Note: The fact that latency is a constant, and does not depend on two +// low-order address bits, implies that our memory controller either: +// (a) tells the DRAM to read the critical word first, and sends the +// critical word first back to the CPU, or (b) waits until it has +// seen all four bit times on the data wires before sending anything +// back. Either is plausible. If (a), remove the "four bit times" +// term from the calculation above. +MEM_CTL_LATENCY: 12 + +// refresh_period is the number of memory cycles between refresh +// of row x in bank n and refresh of row x+1 in bank n. For DDR-400, +// this is typically 7.8 usec for commercial systems; after 8192 such +// refreshes, this will have refreshed the whole chip in 64 msec. If +// we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles. The memory +// controller will divide this by the total number of banks, and kick +// off a refresh to *somebody* every time that amount is counted +// down to zero. (There will be some rounding error there, but it +// should have minimal effect.) +REFRESH_PERIOD: 1560 + +// tFAW is a DRAM chip parameter which restricts the number of +// activates that can be done within a certain window of time. +// The window is specified here in terms of number of memory +// controller cycles. At most four activates may be done during +// any such sliding window. If this number is set to be no more +// than 4 * BASIC_BUS_BUSY_TIME, it will have no effect. +// It is typical in real systems for tFAW to have no effect, but +// it may be useful in throttling power. Set to zero to ignore. +TFAW: 0 + +// By default, the memory controller uses round-robin to arbitrate +// between ready bank queues for use of the address bus. If you +// wish to add randomness to the system, set this parameter to +// one instead, and it will restart the round-robin pointer at a +// random bank number each cycle. If you want additional +// nondeterminism, set the parameter to some integer n >= 2, and +// it will in addition add a n% chance each cycle that a ready bank +// will be delayed an additional cycle. Note that if you are +// in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will +// have no effect, but MEM_RANDOM_ARBITRATE=2 or more will. +MEM_RANDOM_ARBITRATE: 0 + +// The following parameter, if nonzero, will disable the memory +// controller and instead give every request a fixed latency. The +// nonzero value specified here is measured in memory cycles and is +// just added to MEM_CTL_LATENCY. It will also show up in the stats +// file as a contributor to memory_delays_stalled_at_head_of_bank_queue. +MEM_FIXED_DELAY: 0 + +// If instead of DDR-400, you wanted DDR-800, the channel gets faster +// but the basic operation of the DRAM core is unchanged. +// Busy times appear to double just because they are measured +// in smaller clock cycles. The performance advantage comes because +// the bus busy times don't actually quite double. +// You would use something like these values: +// +// MEM_BUS_CYCLE_MULTIPLIER: 5 +// BANK_BUSY_TIME: 22 +// RANK_RANK_DELAY: 2 +// READ_WRITE_DELAY: 3 +// BASIC_BUS_BUSY_TIME: 3 +// MEM_CTL_LATENCY: 20 +// REFRESH_PERIOD: 3120 diff --git a/src/mem/ruby/config/tester.defaults b/src/mem/ruby/config/tester.defaults new file mode 100644 index 000000000..ea83a1443 --- /dev/null +++ b/src/mem/ruby/config/tester.defaults @@ -0,0 +1,60 @@ + +// +// This file contains tester specific changes to the rubyconfig.defaults +// parameter values. +// +// Please: - Add new variables only to rubyconfig.defaults file. +// - Change them here only when necessary. + +g_SIMICS: false +DATA_BLOCK: true +RANDOMIZATION: true +g_SYNTHETIC_DRIVER: true +g_DETERMINISTIC_DRIVER: false +g_DEADLOCK_THRESHOLD: 500000 +g_SpecifiedGenerator: DetermGETXGenerator + +PROTOCOL_DEBUG_TRACE: true + +// +// Generic cache parameters +// + +// Cache sizes are smaller for the random tester to increase the amount +// of false sharing. +L1_CACHE_ASSOC: 2 +L1_CACHE_NUM_SETS_BITS: 2 +L2_CACHE_ASSOC: 2 +L2_CACHE_NUM_SETS_BITS: 5 + +g_MEMORY_SIZE_BYTES: 1048576 + +// XACT MEMORY +XACT_LENGTH: 2000 +XACT_SIZE: 1000 +ABORT_RETRY_TIME: 400 +XACT_ISOLATION_CHECK: true +L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 1000 +DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 1000 +PERFECT_FILTER: true // If true, use perfect read/write filters +READ_WRITE_FILTER: Perfect_ + +//g_NETWORK_TOPOLOGY: FILE_SPECIFIED +RECYCLE_LATENCY: 1 +//NUMBER_OF_VIRTUAL_NETWORKS: 5 +//g_NUM_MEMORIES: 16 +L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 1000 +DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 1000 +//g_PROCS_PER_CHIP: 16 +//g_NUM_L2_BANKS: 16 +//g_endpoint_bandwidth: 10000 +//g_NUM_PROCESSORS: 16 +//g_NUM_SMT_THREADS: 1 +//g_GARNET_NETWORK: true +//g_DETAIL_NETWORK: true +//g_NETWORK_TESTING: false +//g_FLIT_SIZE: 32 +//g_NUM_PIPE_STAGES: 5 +//g_VCS_PER_CLASS: 2 +//g_BUFFER_SIZE: 4 + |