diff options
Diffstat (limited to 'src/mem/ruby/config/rubyconfig.defaults')
-rw-r--r-- | src/mem/ruby/config/rubyconfig.defaults | 466 |
1 files changed, 466 insertions, 0 deletions
diff --git a/src/mem/ruby/config/rubyconfig.defaults b/src/mem/ruby/config/rubyconfig.defaults new file mode 100644 index 000000000..3b86b4645 --- /dev/null +++ b/src/mem/ruby/config/rubyconfig.defaults @@ -0,0 +1,466 @@ +// +// This file has been modified by Kevin Moore and Dan Nussbaum of the +// Scalable Systems Research Group at Sun Microsystems Laboratories +// (http://research.sun.com/scalable/) to support the Adaptive +// Transactional Memory Test Platform (ATMTP). For information about +// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/. +// +// Please send email to atmtp-interest@sun.com with feedback, questions, or +// to request future announcements about ATMTP. +// +// ---------------------------------------------------------------------- +// +// File modification date: 2008-02-23 +// +// ---------------------------------------------------------------------- +// +// ATMTP is distributed as part of the GEMS software toolset and is +// available for use and modification under the terms of version 2 of the +// GNU General Public License. The GNU General Public License is contained +// in the file $GEMS/LICENSE. +// +// Multifacet GEMS is free software; you can redistribute it and/or modify +// it under the terms of version 2 of the GNU General Public License as +// published by the Free Software Foundation. +// +// Multifacet GEMS is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with the Multifacet GEMS; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA +// +// ---------------------------------------------------------------------- +// + +g_RANDOM_SEED: 1 +g_SIMICS: true + +g_DEADLOCK_THRESHOLD: 500000 + +// determines how many Simics cycles advance for every Ruby cycle +// (does not apply when running Opal) +SIMICS_RUBY_MULTIPLIER: 4 + +// corresponding parameter when using Opal+Ruby+Simics +OPAL_RUBY_MULTIPLIER: 1 + + +// Ruby cycles between when a sequencer issues a request and it arrives at +// the L1 cache controller +// +// ** important ** this parameter determines the L2 hit latency when +// using the SMP protocols with a combined L1/L2 controller (-cache.sm) +// +SEQUENCER_TO_CONTROLLER_LATENCY: 4 + + +// When set to false, the L1 cache structures are probed for a hit in Sequencer.C +// If a request hits, it is *not* issued to the cache controller +// When set to true, all processor data requests issue to cache controller +// +// ** important ** this parameter must be set to false for proper L1/L2 hit timing +// for the SMP protocols with combined L1/L2 controllers (-cache.sm) +// +REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false + + +// When running with Opal in SMT configurations, this indicates the number of threads per physical processor +g_NUM_SMT_THREADS: 1 + + +// Maximum number of requests (including SW prefetches) outstanding from +// the sequencer (Note: this also include items buffered in the store +// buffer) +g_SEQUENCER_OUTSTANDING_REQUESTS: 16 + + +PROTOCOL_DEBUG_TRACE: true +DEBUG_FILTER_STRING: none +DEBUG_VERBOSITY_STRING: none +DEBUG_START_TIME: 0 +DEBUG_OUTPUT_FILENAME: none + + +TRANSACTION_TRACE_ENABLED: false +USER_MODE_DATA_ONLY: false +PROFILE_HOT_LINES: false + +PROFILE_ALL_INSTRUCTIONS: false +PRINT_INSTRUCTION_TRACE: false +g_DEBUG_CYCLE: 0 +BLOCK_STC: false +PERFECT_MEMORY_SYSTEM: false +PERFECT_MEMORY_SYSTEM_LATENCY: 0 +DATA_BLOCK: false + + +// ********************************************* +// CACHE & MEMORY PARAMETERS +// ********************************************* + + +L1_CACHE_ASSOC: 4 +L1_CACHE_NUM_SETS_BITS: 8 +L2_CACHE_ASSOC: 4 +L2_CACHE_NUM_SETS_BITS: 16 + +// 32 bits = 4 GB address space +g_MEMORY_SIZE_BYTES: 4294967296 +g_DATA_BLOCK_BYTES: 64 +g_PAGE_SIZE_BYTES: 4096 +g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU + +g_PROCS_PER_CHIP: 1 + + +// set automatically +g_NUM_PROCESSORS: 0 +g_NUM_L2_BANKS: 0 +g_NUM_MEMORIES: 0 + +// The following group of parameters are calculated. They must +// _always_ be left at zero. +g_NUM_CHIPS: 0 +g_NUM_CHIP_BITS: 0 +g_MEMORY_SIZE_BITS: 0 +g_DATA_BLOCK_BITS: 0 +g_PAGE_SIZE_BITS: 0 +g_NUM_PROCESSORS_BITS: 0 +g_PROCS_PER_CHIP_BITS: 0 +g_NUM_L2_BANKS_BITS: 0 +g_NUM_L2_BANKS_PER_CHIP: 0 +g_NUM_L2_BANKS_PER_CHIP_BITS: 0 +g_NUM_MEMORIES_BITS: 0 +g_NUM_MEMORIES_PER_CHIP: 0 +g_MEMORY_MODULE_BITS: 0 +g_MEMORY_MODULE_BLOCKS: 0 + + +// For certain CMP protocols, determines whether the lowest bits of a block address +// are used to index to a L2 cache bank or into the sets of a +// single bank +// lowest highest +// true: g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS +// false: g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS +MAP_L2BANKS_TO_LOWEST_BITS: false + + + +// TIMING PARAMETERS -- many of these are protocol specific. See SLICC files +// to determine where they apply + +MEMORY_RESPONSE_LATENCY_MINUS_2: 158 // determines memory response latency +DIRECTORY_CACHE_LATENCY: 6 +NULL_LATENCY: 1 +ISSUE_LATENCY: 2 +CACHE_RESPONSE_LATENCY: 12 +L1_RESPONSE_LATENCY: 3 +L2_RESPONSE_LATENCY: 6 +L2_TAG_LATENCY: 6 +DIRECTORY_LATENCY: 80 +NETWORK_LINK_LATENCY: 1 +COPY_HEAD_LATENCY: 4 +ON_CHIP_LINK_LATENCY: 1 +RECYCLE_LATENCY: 10 +L2_RECYCLE_LATENCY: 5 +TIMER_LATENCY: 10000 +TBE_RESPONSE_LATENCY: 1 +PERIODIC_TIMER_WAKEUPS: true + + +// constants used by CMP protocols +// cache bank access times +L1_REQUEST_LATENCY: 2 +L2_REQUEST_LATENCY: 4 + + + + +// Number of transitions each controller state machines can complete per cycle +// i.e. the number of ports to each controller +// L1cache is the sum of the L1I and L1D cache ports +L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32 +// Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a +// much greater constraint on the concurrency of a L2 cache bank +L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32 +DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32 + + +// Number of TBEs available for demand misses, ALL prefetches, and replacements +// used by one-level protocols +NUMBER_OF_TBES: 128 +// two-level protocols +NUMBER_OF_L1_TBES: 32 +NUMBER_OF_L2_TBES: 32 + +// TSO is deprecated +TSO: false + + +// ** INTERCONECT PARAMETERS ** +// +g_PRINT_TOPOLOGY: true +g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH +g_CACHE_DESIGN: NUCA // specifies file prefix for FILE_SPECIFIED topology +FAN_OUT_DEGREE: 4 // for HIERARCHICAL SWITCH topology + +g_adaptive_routing: true +NUMBER_OF_VIRTUAL_NETWORKS: 4 + +// bandwidth unit is 1/1000 byte per cycle. the following parameter is multiplied by +// topology specific link weights +g_endpoint_bandwidth: 10000 + + +// ** finite buffering parameters +// +// note: Finite buffering allows us to simulate a realistic virtual cut-through +// routed network with idealized flow control. this feature is NOT heavily tested +FINITE_BUFFERING: false +// All message buffers within the network (i.e. the switch's input and +// output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE +FINITE_BUFFER_SIZE: 3 +// g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests +// issued by the sequencer. The PROCESSOR_BUFFER_SIZE controlls the +// number of requests in the mandatory queue +// Only effects the simualtion when FINITE_BUFFERING is enabled +PROCESSOR_BUFFER_SIZE: 10 +// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to +// Controllers. Controlls the number of request issued by the L2 HW Prefetcher +PROTOCOL_BUFFER_SIZE: 32 +// ** end finite buffering parameters + + +// (deprecated) +// Allows on a single accesses to a multi-cycle L2 bank. +// Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY +// number of cycles. However the TBE table can be accessed in parallel. +SINGLE_ACCESS_L2_BANKS: true + + +// constants used by TM protocols +PROFILE_EXCEPTIONS: false +PROFILE_XACT: true +PROFILE_NONXACT: false +XACT_DEBUG: true +XACT_DEBUG_LEVEL: 1 +//XACT_MEMORY: true // set to true for TM protocols. set it HERE for lazy systems to register the proper SIMICS interfaces +XACT_MEMORY: false +XACT_ENABLE_TOURMALINE: false // perfect memory system +XACT_NUM_CURRENT: 0 // must be 0 +XACT_LAST_UPDATE: 0 // must be 0 +XACT_ISOLATION_CHECK: false // Checks whether each memory access preserves transaction isolation +PERFECT_FILTER: true // If true, use perfect physical read/write filters +READ_WRITE_FILTER: Perfect_ +PERFECT_VIRTUAL_FILTER: true // If true, use perfect virtual read/write filters +VIRTUAL_READ_WRITE_FILTER: Perfect_ +PERFECT_SUMMARY_FILTER: true // If true, use perfect summary read/write filters +SUMMARY_READ_WRITE_FILTER: Perfect_ +XACT_EAGER_CD: true +XACT_LAZY_VM: false +XACT_CONFLICT_RES: BASE +XACT_COMMIT_TOKEN_LATENCY: 0 +XACT_VISUALIZER: false +XACT_NO_BACKOFF: false +XACT_LOG_BUFFER_SIZE: 0 +XACT_STORE_PREDICTOR_ENTRIES: 256 +XACT_STORE_PREDICTOR_HISTORY: 256 +XACT_STORE_PREDICTOR_THRESHOLD: 4 +XACT_FIRST_ACCESS_COST: 0 +XACT_FIRST_PAGE_ACCESS_COST: 0 +ENABLE_MAGIC_WAITING: false +ENABLE_WATCHPOINT: false +XACT_ENABLE_VIRTUALIZATION_LOGTM_SE: false +// g_NETWORK_TOPOLOGY: FILE_SPECIFIED +// NUMBER_OF_VIRTUAL_NETWORKS: 5 +// L2_REQUEST_LATENCY: 15 +// SEQUENCER_TO_CONTROLLER_LATENCY: 3 +// L2_RESPONSE_LATENCY: 20 +// L2_TAG_LATENCY: 6 +// MEMORY_RESPONSE_LATENCY_MINUS_2: 448 +// RECYCLE_LATENCY: 1 +// g_MEMORY_SIZE_BYTES: 268435456 +// REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: true + +// ATMTP +ATMTP_ENABLED: false +ATMTP_ABORT_ON_NON_XACT_INST: false +ATMTP_ALLOW_SAVE_RESTORE_IN_XACT: false +ATMTP_XACT_MAX_STORES: 32 +ATMTP_DEBUG_LEVEL: 0 + +// MOESI_CMP_token parameters (some might be deprecated) +g_FILTERING_ENABLED: false +g_DISTRIBUTED_PERSISTENT_ENABLED: true +g_RETRY_THRESHOLD: 1 +g_DYNAMIC_TIMEOUT_ENABLED: true +g_FIXED_TIMEOUT_LATENCY: 300 + + +// tester parameters (overridden by testerconfig.defaults) +// +// injects random message delays to excite protocol races +RANDOMIZATION: false +g_SYNTHETIC_DRIVER: false +g_DETERMINISTIC_DRIVER: false +g_trace_warmup_length: 1000000 +g_bash_bandwidth_adaptive_threshold: 0.75 + +g_tester_length: 0 +// # of synthetic locks == 16 * 128 +g_synthetic_locks: 2048 +g_deterministic_addrs: 1 +g_SpecifiedGenerator: DetermInvGenerator +g_callback_counter: 0 +g_NUM_COMPLETIONS_BEFORE_PASS: 0 +// parameters used by locking synthetic tester +g_think_time: 5 +g_hold_time: 5 +g_wait_time: 5 + +// Princeton Network (Garnet) +g_GARNET_NETWORK: false +g_DETAIL_NETWORK: false +g_NETWORK_TESTING: false +g_FLIT_SIZE: 16 +g_NUM_PIPE_STAGES: 4 +g_VCS_PER_CLASS: 4 +g_BUFFER_SIZE: 4 + +/////////////////////////////////////////////////////////////////////////////// +// +// MemoryControl: + +// Basic cycle time of the memory controller. This defines the period which is +// used as the memory channel clock period, the address bus bit time, and the +// memory controller cycle time. +// Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data), +// and a 2 GHz Ruby clock: +MEM_BUS_CYCLE_MULTIPLIER: 10 + +// How many internal banks in each DRAM chip: +BANKS_PER_RANK: 8 + +// How many sets of DRAM chips per DIMM. +RANKS_PER_DIMM: 2 + +// How many DIMMs per channel. (Currently the only thing that +// matters is the number of ranks per channel, i.e. the product +// of this parameter and RANKS_PER_DIMM. But if and when this is +// expanded to do FB-DIMMs, the distinction between the two +// will matter.) +DIMMS_PER_CHANNEL: 2 + +// Which bits to use to find the bank, rank, and DIMM numbers. +// You could choose to have the bank bits, rank bits, and DIMM bits +// in any order; here they are in that order. +// For these defaults, we assume this format for addresses: +// Offset within line: [5:0] +// Memory controller #: [7:6] +// Bank: [10:8] +// Rank: [11] +// DIMM: [12] +// Row addr / Col addr: [top:13] +// If you get these bits wrong, then some banks won't see any +// requests; you need to check for this in the .stats output. +BANK_BIT_0: 8 +RANK_BIT_0: 11 +DIMM_BIT_0: 12 + +// Number of entries max in each bank queues; set to whatever you want. +// If it is too small, you will see in the .stats file a lot of delay +// time spent in the common input queue. +BANK_QUEUE_SIZE: 12 + +// Bank cycle time (tRC) measured in memory cycles: +BANK_BUSY_TIME: 11 + +// This is how many memory address cycles to delay between reads to +// different ranks of DRAMs to allow for clock skew: +RANK_RANK_DELAY: 1 + +// This is how many memory address cycles to delay between a read +// and a write. This is based on two things: (1) the data bus is +// used one cycle earlier in the operation; (2) a round-trip wire +// delay from the controller to the DIMM that did the reading. +READ_WRITE_DELAY: 2 + +// Basic address and data bus occupancy. If you are assuming a +// 16-byte-wide data bus (pairs of DIMMs side-by-side), then +// the data bus occupancy matches the address bus occupancy at +// two cycles. But if the channel is only 8 bytes wide, you +// need to increase this bus occupancy time to 4 cycles. +BASIC_BUS_BUSY_TIME: 2 + +// Latency to returning read request or writeback acknowledgement. +// Measured in memory address cycles. +// This equals tRCD + CL + AL + (four bit times) +// + (round trip on channel) +// + (memory control internal delays) +// It's going to be an approximation, so pick what you like. +// Note: The fact that latency is a constant, and does not depend on two +// low-order address bits, implies that our memory controller either: +// (a) tells the DRAM to read the critical word first, and sends the +// critical word first back to the CPU, or (b) waits until it has +// seen all four bit times on the data wires before sending anything +// back. Either is plausible. If (a), remove the "four bit times" +// term from the calculation above. +MEM_CTL_LATENCY: 12 + +// refresh_period is the number of memory cycles between refresh +// of row x in bank n and refresh of row x+1 in bank n. For DDR-400, +// this is typically 7.8 usec for commercial systems; after 8192 such +// refreshes, this will have refreshed the whole chip in 64 msec. If +// we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles. The memory +// controller will divide this by the total number of banks, and kick +// off a refresh to *somebody* every time that amount is counted +// down to zero. (There will be some rounding error there, but it +// should have minimal effect.) +REFRESH_PERIOD: 1560 + +// tFAW is a DRAM chip parameter which restricts the number of +// activates that can be done within a certain window of time. +// The window is specified here in terms of number of memory +// controller cycles. At most four activates may be done during +// any such sliding window. If this number is set to be no more +// than 4 * BASIC_BUS_BUSY_TIME, it will have no effect. +// It is typical in real systems for tFAW to have no effect, but +// it may be useful in throttling power. Set to zero to ignore. +TFAW: 0 + +// By default, the memory controller uses round-robin to arbitrate +// between ready bank queues for use of the address bus. If you +// wish to add randomness to the system, set this parameter to +// one instead, and it will restart the round-robin pointer at a +// random bank number each cycle. If you want additional +// nondeterminism, set the parameter to some integer n >= 2, and +// it will in addition add a n% chance each cycle that a ready bank +// will be delayed an additional cycle. Note that if you are +// in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will +// have no effect, but MEM_RANDOM_ARBITRATE=2 or more will. +MEM_RANDOM_ARBITRATE: 0 + +// The following parameter, if nonzero, will disable the memory +// controller and instead give every request a fixed latency. The +// nonzero value specified here is measured in memory cycles and is +// just added to MEM_CTL_LATENCY. It will also show up in the stats +// file as a contributor to memory_delays_stalled_at_head_of_bank_queue. +MEM_FIXED_DELAY: 0 + +// If instead of DDR-400, you wanted DDR-800, the channel gets faster +// but the basic operation of the DRAM core is unchanged. +// Busy times appear to double just because they are measured +// in smaller clock cycles. The performance advantage comes because +// the bus busy times don't actually quite double. +// You would use something like these values: +// +// MEM_BUS_CYCLE_MULTIPLIER: 5 +// BANK_BUSY_TIME: 22 +// RANK_RANK_DELAY: 2 +// READ_WRITE_DELAY: 3 +// BASIC_BUS_BUSY_TIME: 3 +// MEM_CTL_LATENCY: 20 +// REFRESH_PERIOD: 3120 |