diff options
author | Nathan Binkert <nate@binkert.org> | 2009-05-11 10:38:43 -0700 |
---|---|---|
committer | Nathan Binkert <nate@binkert.org> | 2009-05-11 10:38:43 -0700 |
commit | 2f30950143cc70bc42a3c8a4111d7cf8198ec881 (patch) | |
tree | 708f6c22edb3c6feb31dd82866c26623a5329580 /src/mem/ruby/config/rubyconfig.defaults | |
parent | c70241810d4e4f523f173c1646b008dc40faad8e (diff) | |
download | gem5-2f30950143cc70bc42a3c8a4111d7cf8198ec881.tar.xz |
ruby: Import ruby and slicc from GEMS
We eventually plan to replace the m5 cache hierarchy with the GEMS
hierarchy, but for now we will make both live alongside eachother.
Diffstat (limited to 'src/mem/ruby/config/rubyconfig.defaults')
-rw-r--r-- | src/mem/ruby/config/rubyconfig.defaults | 466 |
1 files changed, 466 insertions, 0 deletions
diff --git a/src/mem/ruby/config/rubyconfig.defaults b/src/mem/ruby/config/rubyconfig.defaults new file mode 100644 index 000000000..3b86b4645 --- /dev/null +++ b/src/mem/ruby/config/rubyconfig.defaults @@ -0,0 +1,466 @@ +// +// This file has been modified by Kevin Moore and Dan Nussbaum of the +// Scalable Systems Research Group at Sun Microsystems Laboratories +// (http://research.sun.com/scalable/) to support the Adaptive +// Transactional Memory Test Platform (ATMTP). For information about +// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/. +// +// Please send email to atmtp-interest@sun.com with feedback, questions, or +// to request future announcements about ATMTP. +// +// ---------------------------------------------------------------------- +// +// File modification date: 2008-02-23 +// +// ---------------------------------------------------------------------- +// +// ATMTP is distributed as part of the GEMS software toolset and is +// available for use and modification under the terms of version 2 of the +// GNU General Public License. The GNU General Public License is contained +// in the file $GEMS/LICENSE. +// +// Multifacet GEMS is free software; you can redistribute it and/or modify +// it under the terms of version 2 of the GNU General Public License as +// published by the Free Software Foundation. +// +// Multifacet GEMS is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with the Multifacet GEMS; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA +// +// ---------------------------------------------------------------------- +// + +g_RANDOM_SEED: 1 +g_SIMICS: true + +g_DEADLOCK_THRESHOLD: 500000 + +// determines how many Simics cycles advance for every Ruby cycle +// (does not apply when running Opal) +SIMICS_RUBY_MULTIPLIER: 4 + +// corresponding parameter when using Opal+Ruby+Simics +OPAL_RUBY_MULTIPLIER: 1 + + +// Ruby cycles between when a sequencer issues a request and it arrives at +// the L1 cache controller +// +// ** important ** this parameter determines the L2 hit latency when +// using the SMP protocols with a combined L1/L2 controller (-cache.sm) +// +SEQUENCER_TO_CONTROLLER_LATENCY: 4 + + +// When set to false, the L1 cache structures are probed for a hit in Sequencer.C +// If a request hits, it is *not* issued to the cache controller +// When set to true, all processor data requests issue to cache controller +// +// ** important ** this parameter must be set to false for proper L1/L2 hit timing +// for the SMP protocols with combined L1/L2 controllers (-cache.sm) +// +REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false + + +// When running with Opal in SMT configurations, this indicates the number of threads per physical processor +g_NUM_SMT_THREADS: 1 + + +// Maximum number of requests (including SW prefetches) outstanding from +// the sequencer (Note: this also include items buffered in the store +// buffer) +g_SEQUENCER_OUTSTANDING_REQUESTS: 16 + + +PROTOCOL_DEBUG_TRACE: true +DEBUG_FILTER_STRING: none +DEBUG_VERBOSITY_STRING: none +DEBUG_START_TIME: 0 +DEBUG_OUTPUT_FILENAME: none + + +TRANSACTION_TRACE_ENABLED: false +USER_MODE_DATA_ONLY: false +PROFILE_HOT_LINES: false + +PROFILE_ALL_INSTRUCTIONS: false +PRINT_INSTRUCTION_TRACE: false +g_DEBUG_CYCLE: 0 +BLOCK_STC: false +PERFECT_MEMORY_SYSTEM: false +PERFECT_MEMORY_SYSTEM_LATENCY: 0 +DATA_BLOCK: false + + +// ********************************************* +// CACHE & MEMORY PARAMETERS +// ********************************************* + + +L1_CACHE_ASSOC: 4 +L1_CACHE_NUM_SETS_BITS: 8 +L2_CACHE_ASSOC: 4 +L2_CACHE_NUM_SETS_BITS: 16 + +// 32 bits = 4 GB address space +g_MEMORY_SIZE_BYTES: 4294967296 +g_DATA_BLOCK_BYTES: 64 +g_PAGE_SIZE_BYTES: 4096 +g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU + +g_PROCS_PER_CHIP: 1 + + +// set automatically +g_NUM_PROCESSORS: 0 +g_NUM_L2_BANKS: 0 +g_NUM_MEMORIES: 0 + +// The following group of parameters are calculated. They must +// _always_ be left at zero. +g_NUM_CHIPS: 0 +g_NUM_CHIP_BITS: 0 +g_MEMORY_SIZE_BITS: 0 +g_DATA_BLOCK_BITS: 0 +g_PAGE_SIZE_BITS: 0 +g_NUM_PROCESSORS_BITS: 0 +g_PROCS_PER_CHIP_BITS: 0 +g_NUM_L2_BANKS_BITS: 0 +g_NUM_L2_BANKS_PER_CHIP: 0 +g_NUM_L2_BANKS_PER_CHIP_BITS: 0 +g_NUM_MEMORIES_BITS: 0 +g_NUM_MEMORIES_PER_CHIP: 0 +g_MEMORY_MODULE_BITS: 0 +g_MEMORY_MODULE_BLOCKS: 0 + + +// For certain CMP protocols, determines whether the lowest bits of a block address +// are used to index to a L2 cache bank or into the sets of a +// single bank +// lowest highest +// true: g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS +// false: g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS +MAP_L2BANKS_TO_LOWEST_BITS: false + + + +// TIMING PARAMETERS -- many of these are protocol specific. See SLICC files +// to determine where they apply + +MEMORY_RESPONSE_LATENCY_MINUS_2: 158 // determines memory response latency +DIRECTORY_CACHE_LATENCY: 6 +NULL_LATENCY: 1 +ISSUE_LATENCY: 2 +CACHE_RESPONSE_LATENCY: 12 +L1_RESPONSE_LATENCY: 3 +L2_RESPONSE_LATENCY: 6 +L2_TAG_LATENCY: 6 +DIRECTORY_LATENCY: 80 +NETWORK_LINK_LATENCY: 1 +COPY_HEAD_LATENCY: 4 +ON_CHIP_LINK_LATENCY: 1 +RECYCLE_LATENCY: 10 +L2_RECYCLE_LATENCY: 5 +TIMER_LATENCY: 10000 +TBE_RESPONSE_LATENCY: 1 +PERIODIC_TIMER_WAKEUPS: true + + +// constants used by CMP protocols +// cache bank access times +L1_REQUEST_LATENCY: 2 +L2_REQUEST_LATENCY: 4 + + + + +// Number of transitions each controller state machines can complete per cycle +// i.e. the number of ports to each controller +// L1cache is the sum of the L1I and L1D cache ports +L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32 +// Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a +// much greater constraint on the concurrency of a L2 cache bank +L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32 +DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32 + + +// Number of TBEs available for demand misses, ALL prefetches, and replacements +// used by one-level protocols +NUMBER_OF_TBES: 128 +// two-level protocols +NUMBER_OF_L1_TBES: 32 +NUMBER_OF_L2_TBES: 32 + +// TSO is deprecated +TSO: false + + +// ** INTERCONECT PARAMETERS ** +// +g_PRINT_TOPOLOGY: true +g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH +g_CACHE_DESIGN: NUCA // specifies file prefix for FILE_SPECIFIED topology +FAN_OUT_DEGREE: 4 // for HIERARCHICAL SWITCH topology + +g_adaptive_routing: true +NUMBER_OF_VIRTUAL_NETWORKS: 4 + +// bandwidth unit is 1/1000 byte per cycle. the following parameter is multiplied by +// topology specific link weights +g_endpoint_bandwidth: 10000 + + +// ** finite buffering parameters +// +// note: Finite buffering allows us to simulate a realistic virtual cut-through +// routed network with idealized flow control. this feature is NOT heavily tested +FINITE_BUFFERING: false +// All message buffers within the network (i.e. the switch's input and +// output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE +FINITE_BUFFER_SIZE: 3 +// g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests +// issued by the sequencer. The PROCESSOR_BUFFER_SIZE controlls the +// number of requests in the mandatory queue +// Only effects the simualtion when FINITE_BUFFERING is enabled +PROCESSOR_BUFFER_SIZE: 10 +// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to +// Controllers. Controlls the number of request issued by the L2 HW Prefetcher +PROTOCOL_BUFFER_SIZE: 32 +// ** end finite buffering parameters + + +// (deprecated) +// Allows on a single accesses to a multi-cycle L2 bank. +// Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY +// number of cycles. However the TBE table can be accessed in parallel. +SINGLE_ACCESS_L2_BANKS: true + + +// constants used by TM protocols +PROFILE_EXCEPTIONS: false +PROFILE_XACT: true +PROFILE_NONXACT: false +XACT_DEBUG: true +XACT_DEBUG_LEVEL: 1 +//XACT_MEMORY: true // set to true for TM protocols. set it HERE for lazy systems to register the proper SIMICS interfaces +XACT_MEMORY: false +XACT_ENABLE_TOURMALINE: false // perfect memory system +XACT_NUM_CURRENT: 0 // must be 0 +XACT_LAST_UPDATE: 0 // must be 0 +XACT_ISOLATION_CHECK: false // Checks whether each memory access preserves transaction isolation +PERFECT_FILTER: true // If true, use perfect physical read/write filters +READ_WRITE_FILTER: Perfect_ +PERFECT_VIRTUAL_FILTER: true // If true, use perfect virtual read/write filters +VIRTUAL_READ_WRITE_FILTER: Perfect_ +PERFECT_SUMMARY_FILTER: true // If true, use perfect summary read/write filters +SUMMARY_READ_WRITE_FILTER: Perfect_ +XACT_EAGER_CD: true +XACT_LAZY_VM: false +XACT_CONFLICT_RES: BASE +XACT_COMMIT_TOKEN_LATENCY: 0 +XACT_VISUALIZER: false +XACT_NO_BACKOFF: false +XACT_LOG_BUFFER_SIZE: 0 +XACT_STORE_PREDICTOR_ENTRIES: 256 +XACT_STORE_PREDICTOR_HISTORY: 256 +XACT_STORE_PREDICTOR_THRESHOLD: 4 +XACT_FIRST_ACCESS_COST: 0 +XACT_FIRST_PAGE_ACCESS_COST: 0 +ENABLE_MAGIC_WAITING: false +ENABLE_WATCHPOINT: false +XACT_ENABLE_VIRTUALIZATION_LOGTM_SE: false +// g_NETWORK_TOPOLOGY: FILE_SPECIFIED +// NUMBER_OF_VIRTUAL_NETWORKS: 5 +// L2_REQUEST_LATENCY: 15 +// SEQUENCER_TO_CONTROLLER_LATENCY: 3 +// L2_RESPONSE_LATENCY: 20 +// L2_TAG_LATENCY: 6 +// MEMORY_RESPONSE_LATENCY_MINUS_2: 448 +// RECYCLE_LATENCY: 1 +// g_MEMORY_SIZE_BYTES: 268435456 +// REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: true + +// ATMTP +ATMTP_ENABLED: false +ATMTP_ABORT_ON_NON_XACT_INST: false +ATMTP_ALLOW_SAVE_RESTORE_IN_XACT: false +ATMTP_XACT_MAX_STORES: 32 +ATMTP_DEBUG_LEVEL: 0 + +// MOESI_CMP_token parameters (some might be deprecated) +g_FILTERING_ENABLED: false +g_DISTRIBUTED_PERSISTENT_ENABLED: true +g_RETRY_THRESHOLD: 1 +g_DYNAMIC_TIMEOUT_ENABLED: true +g_FIXED_TIMEOUT_LATENCY: 300 + + +// tester parameters (overridden by testerconfig.defaults) +// +// injects random message delays to excite protocol races +RANDOMIZATION: false +g_SYNTHETIC_DRIVER: false +g_DETERMINISTIC_DRIVER: false +g_trace_warmup_length: 1000000 +g_bash_bandwidth_adaptive_threshold: 0.75 + +g_tester_length: 0 +// # of synthetic locks == 16 * 128 +g_synthetic_locks: 2048 +g_deterministic_addrs: 1 +g_SpecifiedGenerator: DetermInvGenerator +g_callback_counter: 0 +g_NUM_COMPLETIONS_BEFORE_PASS: 0 +// parameters used by locking synthetic tester +g_think_time: 5 +g_hold_time: 5 +g_wait_time: 5 + +// Princeton Network (Garnet) +g_GARNET_NETWORK: false +g_DETAIL_NETWORK: false +g_NETWORK_TESTING: false +g_FLIT_SIZE: 16 +g_NUM_PIPE_STAGES: 4 +g_VCS_PER_CLASS: 4 +g_BUFFER_SIZE: 4 + +/////////////////////////////////////////////////////////////////////////////// +// +// MemoryControl: + +// Basic cycle time of the memory controller. This defines the period which is +// used as the memory channel clock period, the address bus bit time, and the +// memory controller cycle time. +// Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data), +// and a 2 GHz Ruby clock: +MEM_BUS_CYCLE_MULTIPLIER: 10 + +// How many internal banks in each DRAM chip: +BANKS_PER_RANK: 8 + +// How many sets of DRAM chips per DIMM. +RANKS_PER_DIMM: 2 + +// How many DIMMs per channel. (Currently the only thing that +// matters is the number of ranks per channel, i.e. the product +// of this parameter and RANKS_PER_DIMM. But if and when this is +// expanded to do FB-DIMMs, the distinction between the two +// will matter.) +DIMMS_PER_CHANNEL: 2 + +// Which bits to use to find the bank, rank, and DIMM numbers. +// You could choose to have the bank bits, rank bits, and DIMM bits +// in any order; here they are in that order. +// For these defaults, we assume this format for addresses: +// Offset within line: [5:0] +// Memory controller #: [7:6] +// Bank: [10:8] +// Rank: [11] +// DIMM: [12] +// Row addr / Col addr: [top:13] +// If you get these bits wrong, then some banks won't see any +// requests; you need to check for this in the .stats output. +BANK_BIT_0: 8 +RANK_BIT_0: 11 +DIMM_BIT_0: 12 + +// Number of entries max in each bank queues; set to whatever you want. +// If it is too small, you will see in the .stats file a lot of delay +// time spent in the common input queue. +BANK_QUEUE_SIZE: 12 + +// Bank cycle time (tRC) measured in memory cycles: +BANK_BUSY_TIME: 11 + +// This is how many memory address cycles to delay between reads to +// different ranks of DRAMs to allow for clock skew: +RANK_RANK_DELAY: 1 + +// This is how many memory address cycles to delay between a read +// and a write. This is based on two things: (1) the data bus is +// used one cycle earlier in the operation; (2) a round-trip wire +// delay from the controller to the DIMM that did the reading. +READ_WRITE_DELAY: 2 + +// Basic address and data bus occupancy. If you are assuming a +// 16-byte-wide data bus (pairs of DIMMs side-by-side), then +// the data bus occupancy matches the address bus occupancy at +// two cycles. But if the channel is only 8 bytes wide, you +// need to increase this bus occupancy time to 4 cycles. +BASIC_BUS_BUSY_TIME: 2 + +// Latency to returning read request or writeback acknowledgement. +// Measured in memory address cycles. +// This equals tRCD + CL + AL + (four bit times) +// + (round trip on channel) +// + (memory control internal delays) +// It's going to be an approximation, so pick what you like. +// Note: The fact that latency is a constant, and does not depend on two +// low-order address bits, implies that our memory controller either: +// (a) tells the DRAM to read the critical word first, and sends the +// critical word first back to the CPU, or (b) waits until it has +// seen all four bit times on the data wires before sending anything +// back. Either is plausible. If (a), remove the "four bit times" +// term from the calculation above. +MEM_CTL_LATENCY: 12 + +// refresh_period is the number of memory cycles between refresh +// of row x in bank n and refresh of row x+1 in bank n. For DDR-400, +// this is typically 7.8 usec for commercial systems; after 8192 such +// refreshes, this will have refreshed the whole chip in 64 msec. If +// we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles. The memory +// controller will divide this by the total number of banks, and kick +// off a refresh to *somebody* every time that amount is counted +// down to zero. (There will be some rounding error there, but it +// should have minimal effect.) +REFRESH_PERIOD: 1560 + +// tFAW is a DRAM chip parameter which restricts the number of +// activates that can be done within a certain window of time. +// The window is specified here in terms of number of memory +// controller cycles. At most four activates may be done during +// any such sliding window. If this number is set to be no more +// than 4 * BASIC_BUS_BUSY_TIME, it will have no effect. +// It is typical in real systems for tFAW to have no effect, but +// it may be useful in throttling power. Set to zero to ignore. +TFAW: 0 + +// By default, the memory controller uses round-robin to arbitrate +// between ready bank queues for use of the address bus. If you +// wish to add randomness to the system, set this parameter to +// one instead, and it will restart the round-robin pointer at a +// random bank number each cycle. If you want additional +// nondeterminism, set the parameter to some integer n >= 2, and +// it will in addition add a n% chance each cycle that a ready bank +// will be delayed an additional cycle. Note that if you are +// in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will +// have no effect, but MEM_RANDOM_ARBITRATE=2 or more will. +MEM_RANDOM_ARBITRATE: 0 + +// The following parameter, if nonzero, will disable the memory +// controller and instead give every request a fixed latency. The +// nonzero value specified here is measured in memory cycles and is +// just added to MEM_CTL_LATENCY. It will also show up in the stats +// file as a contributor to memory_delays_stalled_at_head_of_bank_queue. +MEM_FIXED_DELAY: 0 + +// If instead of DDR-400, you wanted DDR-800, the channel gets faster +// but the basic operation of the DRAM core is unchanged. +// Busy times appear to double just because they are measured +// in smaller clock cycles. The performance advantage comes because +// the bus busy times don't actually quite double. +// You would use something like these values: +// +// MEM_BUS_CYCLE_MULTIPLIER: 5 +// BANK_BUSY_TIME: 22 +// RANK_RANK_DELAY: 2 +// READ_WRITE_DELAY: 3 +// BASIC_BUS_BUSY_TIME: 3 +// MEM_CTL_LATENCY: 20 +// REFRESH_PERIOD: 3120 |