summaryrefslogtreecommitdiff
path: root/src/mem/ruby/config/rubyconfig.defaults
blob: 873192c056455c8db61e0a40aa07e71b6ea8768a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
//
// This file has been modified by Kevin Moore and Dan Nussbaum of the
// Scalable Systems Research Group at Sun Microsystems Laboratories
// (http://research.sun.com/scalable/) to support the Adaptive
// Transactional Memory Test Platform (ATMTP).  For information about
// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/.
//
// Please send email to atmtp-interest@sun.com with feedback, questions, or
// to request future announcements about ATMTP.
//
// ----------------------------------------------------------------------
//
// File modification date: 2008-02-23
//
// ----------------------------------------------------------------------
//
// ATMTP is distributed as part of the GEMS software toolset and is
// available for use and modification under the terms of version 2 of the
// GNU General Public License.  The GNU General Public License is contained
// in the file $GEMS/LICENSE.
//
// Multifacet GEMS is free software; you can redistribute it and/or modify
// it under the terms of version 2 of the GNU General Public License as
// published by the Free Software Foundation.
//
// Multifacet GEMS is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with the Multifacet GEMS; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
//
// ----------------------------------------------------------------------
//

g_RANDOM_SEED: 1

g_DEADLOCK_THRESHOLD: 500000

// determines how many Simics cycles advance for every Ruby cycle
//  (does not apply when running Opal)
SIMICS_RUBY_MULTIPLIER: 4

// corresponding parameter when using Opal+Ruby+Simics
OPAL_RUBY_MULTIPLIER: 1


// Ruby cycles between when a sequencer issues a request and it arrives at
// the L1 cache controller
//
// ** important ** this parameter determines the L2 hit latency when
//  using the SMP protocols with a combined L1/L2 controller (-cache.sm)
//
SEQUENCER_TO_CONTROLLER_LATENCY: 4


// When set to false, the L1 cache structures are probed for a hit in Sequencer.C
//  If a request hits, it is *not* issued to the cache controller
// When set to true, all processor data requests issue to cache controller
//
// ** important ** this parameter must be set to false for proper L1/L2 hit timing
//  for the SMP protocols with combined L1/L2 controllers (-cache.sm)
//
REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false


// When running with Opal in SMT configurations, this indicates the number of threads per physical processor
g_NUM_SMT_THREADS: 1


// Maximum number of requests (including SW prefetches) outstanding from
// the sequencer (Note: this also include items buffered in the store
// buffer)
g_SEQUENCER_OUTSTANDING_REQUESTS: 16


PROTOCOL_DEBUG_TRACE: true
DEBUG_FILTER_STRING: none
DEBUG_VERBOSITY_STRING: none
DEBUG_START_TIME: 0
DEBUG_OUTPUT_FILENAME: none


TRANSACTION_TRACE_ENABLED: false
USER_MODE_DATA_ONLY: false
PROFILE_HOT_LINES: false

PROFILE_ALL_INSTRUCTIONS: false
PRINT_INSTRUCTION_TRACE: false
g_DEBUG_CYCLE: 0
BLOCK_STC: false
PERFECT_MEMORY_SYSTEM: false
PERFECT_MEMORY_SYSTEM_LATENCY: 0
DATA_BLOCK: false


// *********************************************
// CACHE & MEMORY PARAMETERS
// *********************************************


L1_CACHE_ASSOC: 4
L1_CACHE_NUM_SETS_BITS: 8
L2_CACHE_ASSOC: 4
L2_CACHE_NUM_SETS_BITS: 16

// 32 bits = 4 GB address space
g_MEMORY_SIZE_BYTES: 4294967296
g_DATA_BLOCK_BYTES: 64
g_PAGE_SIZE_BYTES: 4096
g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU

g_PROCS_PER_CHIP: 1


// set automatically
g_NUM_PROCESSORS: 0
g_NUM_L2_BANKS: 0
g_NUM_MEMORIES: 0

// The following group of parameters are calculated.  They must
// _always_ be left at zero.
g_NUM_CHIPS: 0
g_NUM_CHIP_BITS: 0
g_MEMORY_SIZE_BITS: 0
g_DATA_BLOCK_BITS: 0
g_PAGE_SIZE_BITS: 0
g_NUM_PROCESSORS_BITS: 0
g_PROCS_PER_CHIP_BITS: 0
g_NUM_L2_BANKS_BITS: 0
g_NUM_L2_BANKS_PER_CHIP: 0
g_NUM_L2_BANKS_PER_CHIP_BITS: 0
g_NUM_MEMORIES_BITS: 0
g_NUM_MEMORIES_PER_CHIP: 0
g_MEMORY_MODULE_BITS: 0
g_MEMORY_MODULE_BLOCKS: 0


// For certain CMP protocols, determines whether the lowest bits of a block address
// are used to index to a L2 cache bank or into the sets of a
// single bank
//        lowest                                                             highest
// true:   g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS
// false:  g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS
MAP_L2BANKS_TO_LOWEST_BITS: false



// TIMING PARAMETERS  -- many of these are protocol specific.  See SLICC files
//                       to determine where they apply

MEMORY_RESPONSE_LATENCY_MINUS_2: 158  // determines memory response latency
DIRECTORY_CACHE_LATENCY: 6
NULL_LATENCY: 1
ISSUE_LATENCY: 2
CACHE_RESPONSE_LATENCY: 12
L1_RESPONSE_LATENCY: 3
L2_RESPONSE_LATENCY: 6
L2_TAG_LATENCY: 6
DIRECTORY_LATENCY: 80
NETWORK_LINK_LATENCY: 1
COPY_HEAD_LATENCY: 4
ON_CHIP_LINK_LATENCY: 1
RECYCLE_LATENCY: 10
L2_RECYCLE_LATENCY: 5
TIMER_LATENCY: 10000
TBE_RESPONSE_LATENCY: 1
PERIODIC_TIMER_WAKEUPS: true


// constants used by CMP protocols
// cache bank access times
L1_REQUEST_LATENCY: 2
L2_REQUEST_LATENCY: 4




// Number of transitions each controller state machines can complete per cycle
// i.e. the number of ports to each controller
// L1cache is the sum of the L1I and L1D cache ports
L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
// Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a
// much greater constraint on the concurrency of a L2 cache bank
L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32


// Number of TBEs available for demand misses, ALL prefetches, and replacements
// used by one-level protocols
NUMBER_OF_TBES: 128
// two-level protocols
NUMBER_OF_L1_TBES: 32
NUMBER_OF_L2_TBES: 32

// TSO is deprecated
TSO: false


// ** INTERCONECT PARAMETERS **
//
g_PRINT_TOPOLOGY: true
g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH
g_CACHE_DESIGN: NUCA  // specifies file prefix for FILE_SPECIFIED topology
FAN_OUT_DEGREE: 4  // for HIERARCHICAL SWITCH topology

g_adaptive_routing: true
NUMBER_OF_VIRTUAL_NETWORKS: 4

// bandwidth unit is 1/1000 byte per cycle.  the following parameter is multiplied by
//  topology specific link weights
g_endpoint_bandwidth: 10000


// ** finite buffering parameters
//
// note: Finite buffering allows us to simulate a realistic virtual cut-through
// routed network with idealized flow control.  this feature is NOT heavily tested
FINITE_BUFFERING: false
// All message buffers within the network (i.e. the switch's input and
// output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE
FINITE_BUFFER_SIZE: 3
// g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests
// issued by the sequencer.  The PROCESSOR_BUFFER_SIZE controlls the
// number of requests in the mandatory queue
// Only effects the simualtion when FINITE_BUFFERING is enabled
PROCESSOR_BUFFER_SIZE: 10
// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
// Controllers.  Controlls the number of request issued by the L2 HW Prefetcher
PROTOCOL_BUFFER_SIZE: 32
// ** end finite buffering parameters


// (deprecated)
// Allows on a single accesses to a multi-cycle L2 bank.
// Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY
// number of cycles.  However the TBE table can be accessed in parallel.
SINGLE_ACCESS_L2_BANKS: true


// constants used by TM protocols
PROFILE_EXCEPTIONS: false
PROFILE_XACT: true
PROFILE_NONXACT: false
XACT_DEBUG: true
XACT_DEBUG_LEVEL: 1
//XACT_MEMORY: true  // set to true for TM protocols. set it HERE for lazy systems to register the proper SIMICS interfaces
XACT_MEMORY: false
XACT_ENABLE_TOURMALINE: false // perfect memory system
XACT_NUM_CURRENT: 0 // must be 0
XACT_LAST_UPDATE: 0 // must be 0
XACT_ISOLATION_CHECK: false // Checks whether each memory access preserves transaction isolation
PERFECT_FILTER: true                // If true, use perfect physical read/write filters
READ_WRITE_FILTER: Perfect_
PERFECT_VIRTUAL_FILTER: true        // If true, use perfect virtual read/write filters
VIRTUAL_READ_WRITE_FILTER: Perfect_
PERFECT_SUMMARY_FILTER: true        // If true, use perfect summary read/write filters
SUMMARY_READ_WRITE_FILTER: Perfect_
XACT_EAGER_CD: true
XACT_LAZY_VM: false
XACT_CONFLICT_RES: BASE
XACT_COMMIT_TOKEN_LATENCY: 0
XACT_VISUALIZER: false
XACT_NO_BACKOFF: false
XACT_LOG_BUFFER_SIZE: 0
XACT_STORE_PREDICTOR_ENTRIES: 256
XACT_STORE_PREDICTOR_HISTORY: 256
XACT_STORE_PREDICTOR_THRESHOLD: 4
XACT_FIRST_ACCESS_COST: 0
XACT_FIRST_PAGE_ACCESS_COST: 0
ENABLE_MAGIC_WAITING: false
ENABLE_WATCHPOINT: false
XACT_ENABLE_VIRTUALIZATION_LOGTM_SE: false
// g_NETWORK_TOPOLOGY: FILE_SPECIFIED
// NUMBER_OF_VIRTUAL_NETWORKS: 5
// L2_REQUEST_LATENCY: 15
// SEQUENCER_TO_CONTROLLER_LATENCY: 3
// L2_RESPONSE_LATENCY: 20
// L2_TAG_LATENCY: 6
// MEMORY_RESPONSE_LATENCY_MINUS_2: 448
// RECYCLE_LATENCY: 1
// g_MEMORY_SIZE_BYTES: 268435456
// REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: true

// ATMTP
ATMTP_ENABLED: false
ATMTP_ABORT_ON_NON_XACT_INST: false
ATMTP_ALLOW_SAVE_RESTORE_IN_XACT: false
ATMTP_XACT_MAX_STORES: 32
ATMTP_DEBUG_LEVEL: 0

// MOESI_CMP_token parameters (some might be deprecated)
g_FILTERING_ENABLED: false
g_DISTRIBUTED_PERSISTENT_ENABLED: true
g_RETRY_THRESHOLD: 1
g_DYNAMIC_TIMEOUT_ENABLED: true
g_FIXED_TIMEOUT_LATENCY: 300


// tester parameters (overridden by testerconfig.defaults)
//
//  injects random message delays to excite protocol races
RANDOMIZATION: false
g_SYNTHETIC_DRIVER: false
g_DETERMINISTIC_DRIVER: false
g_trace_warmup_length: 1000000
g_bash_bandwidth_adaptive_threshold: 0.75

g_tester_length: 0
// # of synthetic locks == 16 * 128
g_synthetic_locks: 2048
g_deterministic_addrs: 1
g_SpecifiedGenerator: DetermInvGenerator
g_callback_counter: 0
g_NUM_COMPLETIONS_BEFORE_PASS: 0
// parameters used by locking synthetic tester
g_think_time: 5
g_hold_time:  5
g_wait_time:  5

// Princeton Network (Garnet)
g_GARNET_NETWORK: false
g_DETAIL_NETWORK: false
g_NETWORK_TESTING: false
g_FLIT_SIZE: 16
g_NUM_PIPE_STAGES: 4
g_VCS_PER_CLASS: 4
g_BUFFER_SIZE: 4

///////////////////////////////////////////////////////////////////////////////
//
// MemoryControl:

// Basic cycle time of the memory controller.  This defines the period which is
// used as the memory channel clock period, the address bus bit time, and the
// memory controller cycle time.
// Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data),
// and a 2 GHz Ruby clock:
MEM_BUS_CYCLE_MULTIPLIER: 10

// How many internal banks in each DRAM chip:
BANKS_PER_RANK: 8

// How many sets of DRAM chips per DIMM.
RANKS_PER_DIMM: 2

// How many DIMMs per channel.  (Currently the only thing that
// matters is the number of ranks per channel, i.e. the product
// of this parameter and RANKS_PER_DIMM.  But if and when this is
// expanded to do FB-DIMMs, the distinction between the two
// will matter.)
DIMMS_PER_CHANNEL: 2

// Which bits to use to find the bank, rank, and DIMM numbers.
// You could choose to have the bank bits, rank bits, and DIMM bits
// in any order; here they are in that order.
// For these defaults, we assume this format for addresses:
//    Offset within line:     [5:0]
//    Memory controller #:    [7:6]
//    Bank:                  [10:8]
//    Rank:                    [11]
//    DIMM:                    [12]
//    Row addr / Col addr: [top:13]
// If you get these bits wrong, then some banks won't see any
// requests; you need to check for this in the .stats output.
BANK_BIT_0: 8
RANK_BIT_0: 11
DIMM_BIT_0: 12

// Number of entries max in each bank queues; set to whatever you want.
// If it is too small, you will see in the .stats file a lot of delay
// time spent in the common input queue.
BANK_QUEUE_SIZE: 12

// Bank cycle time (tRC) measured in memory cycles:
BANK_BUSY_TIME: 11

// This is how many memory address cycles to delay between reads to
// different ranks of DRAMs to allow for clock skew:
RANK_RANK_DELAY: 1

// This is how many memory address cycles to delay between a read
// and a write.  This is based on two things:  (1) the data bus is
// used one cycle earlier in the operation; (2) a round-trip wire
// delay from the controller to the DIMM that did the reading.
READ_WRITE_DELAY: 2

// Basic address and data bus occupancy.  If you are assuming a
// 16-byte-wide data bus (pairs of DIMMs side-by-side), then
// the data bus occupancy matches the address bus occupancy at
// two cycles.  But if the channel is only 8 bytes wide, you
// need to increase this bus occupancy time to 4 cycles.
BASIC_BUS_BUSY_TIME: 2

// Latency to returning read request or writeback acknowledgement.
// Measured in memory address cycles.
// This equals tRCD + CL + AL + (four bit times)
//                            + (round trip on channel)
//                            + (memory control internal delays)
// It's going to be an approximation, so pick what you like.
// Note:  The fact that latency is a constant, and does not depend on two
// low-order address bits, implies that our memory controller either:
// (a) tells the DRAM to read the critical word first, and sends the
// critical word first back to the CPU, or (b) waits until it has
// seen all four bit times on the data wires before sending anything
// back.  Either is plausible.  If (a), remove the "four bit times"
// term from the calculation above.
MEM_CTL_LATENCY: 12

// refresh_period is the number of memory cycles between refresh
// of row x in bank n and refresh of row x+1 in bank n.  For DDR-400,
// this is typically 7.8 usec for commercial systems; after 8192 such
// refreshes, this will have refreshed the whole chip in 64 msec.  If
// we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles.  The memory
// controller will divide this by the total number of banks, and kick
// off a refresh to *somebody* every time that amount is counted
// down to zero. (There will be some rounding error there, but it
// should have minimal effect.)
REFRESH_PERIOD: 1560

// tFAW is a DRAM chip parameter which restricts the number of
// activates that can be done within a certain window of time.
// The window is specified here in terms of number of memory
// controller cycles.  At most four activates may be done during
// any such sliding window.  If this number is set to be no more
// than 4 * BASIC_BUS_BUSY_TIME, it will have no effect.
// It is typical in real systems for tFAW to have no effect, but
// it may be useful in throttling power.  Set to zero to ignore.
TFAW: 0

// By default, the memory controller uses round-robin to arbitrate
// between ready bank queues for use of the address bus.  If you
// wish to add randomness to the system, set this parameter to
// one instead, and it will restart the round-robin pointer at a
// random bank number each cycle.  If you want additional
// nondeterminism, set the parameter to some integer n >= 2, and
// it will in addition add a n% chance each cycle that a ready bank
// will be delayed an additional cycle.  Note that if you are
// in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will
// have no effect, but MEM_RANDOM_ARBITRATE=2 or more will.
MEM_RANDOM_ARBITRATE: 0

// The following parameter, if nonzero, will disable the memory
// controller and instead give every request a fixed latency.  The
// nonzero value specified here is measured in memory cycles and is
// just added to MEM_CTL_LATENCY.  It will also show up in the stats
// file as a contributor to memory_delays_stalled_at_head_of_bank_queue.
MEM_FIXED_DELAY: 0

// If instead of DDR-400, you wanted DDR-800, the channel gets faster
// but the basic operation of the DRAM core is unchanged.
// Busy times appear to double just because they are measured
// in smaller clock cycles.  The performance advantage comes because
// the bus busy times don't actually quite double.
// You would use something like these values:
//
// MEM_BUS_CYCLE_MULTIPLIER: 5
// BANK_BUSY_TIME: 22
// RANK_RANK_DELAY: 2
// READ_WRITE_DELAY: 3
// BASIC_BUS_BUSY_TIME: 3
// MEM_CTL_LATENCY: 20
// REFRESH_PERIOD: 3120