src/mem/ruby/system/MemoryControl.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685

/*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met: redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer;
 * redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution;
 * neither the name of the copyright holders nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Description:  This module simulates a basic DDR-style memory controller
 * (and can easily be extended to do FB-DIMM as well).
 *
 * This module models a single channel, connected to any number of
 * DIMMs with any number of ranks of DRAMs each.  If you want multiple
 * address/data channels, you need to instantiate multiple copies of
 * this module.
 *
 * Each memory request is placed in a queue associated with a specific
 * memory bank.  This queue is of finite size; if the queue is full
 * the request will back up in an (infinite) common queue and will
 * effectively throttle the whole system.  This sort of behavior is
 * intended to be closer to real system behavior than if we had an
 * infinite queue on each bank.  If you want the latter, just make
 * the bank queues unreasonably large.
 *
 * The head item on a bank queue is issued when all of the
 * following are true:
 *   the bank is available
 *   the address path to the DIMM is available
 *   the data path to or from the DIMM is available
 *
 * Note that we are not concerned about fixed offsets in time.  The bank
 * will not be used at the same moment as the address path, but since
 * there is no queue in the DIMM or the DRAM it will be used at a constant
 * number of cycles later, so it is treated as if it is used at the same
 * time.
 *
 * We are assuming closed bank policy; that is, we automatically close
 * each bank after a single read or write.  Adding an option for open
 * bank policy is for future work.
 *
 * We are assuming "posted CAS"; that is, we send the READ or WRITE
 * immediately after the ACTIVATE.  This makes scheduling the address
 * bus trivial; we always schedule a fixed set of cycles.  For DDR-400,
 * this is a set of two cycles; for some configurations such as
 * DDR-800 the parameter tRRD forces this to be set to three cycles.
 *
 * We assume a four-bit-time transfer on the data wires.  This is
 * the minimum burst length for DDR-2.  This would correspond
 * to (for example) a memory where each DIMM is 72 bits wide
 * and DIMMs are ganged in pairs to deliver 64 bytes at a shot.
 * This gives us the same occupancy on the data wires as on the
 * address wires (for the two-address-cycle case).
 *
 * The only non-trivial scheduling problem is the data wires.
 * A write will use the wires earlier in the operation than a read
 * will; typically one cycle earlier as seen at the DRAM, but earlier
 * by a worst-case round-trip wire delay when seen at the memory controller.
 * So, while reads from one rank can be scheduled back-to-back
 * every two cycles, and writes (to any rank) scheduled every two cycles,
 * when a read is followed by a write we need to insert a bubble.
 * Furthermore, consecutive reads from two different ranks may need
 * to insert a bubble due to skew between when one DRAM stops driving the
 * wires and when the other one starts.  (These bubbles are parameters.)
 *
 * This means that when some number of reads and writes are at the
 * heads of their queues, reads could starve writes, and/or reads
 * to the same rank could starve out other requests, since the others
 * would never see the data bus ready.
 * For this reason, we have implemented an anti-starvation feature.
 * A group of requests is marked "old", and a counter is incremented
 * each cycle as long as any request from that batch has not issued.
 * if the counter reaches twice the bank busy time, we hold off any
 * newer requests until all of the "old" requests have issued.
 *
 * We also model tFAW.  This is an obscure DRAM parameter that says
 * that no more than four activate requests can happen within a window
 * of a certain size.  For most configurations this does not come into play,
 * or has very little effect, but it could be used to throttle the power
 * consumption of the DRAM.  In this implementation (unlike in a DRAM
 * data sheet) TFAW is measured in memory bus cycles; i.e. if TFAW = 16
 * then no more than four activates may happen within any 16 cycle window.
 * Refreshes are included in the activates.
 *
 */

#include <list>

#include "base/cprintf.hh"
#include "mem/gems_common/Map.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Consumer.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/network/Network.hh"
#include "mem/ruby/profiler/Profiler.hh"
#include "mem/ruby/slicc_interface/NetworkMessage.hh"
#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
#include "mem/ruby/system/MemoryControl.hh"
#include "mem/ruby/system/System.hh"

class Consumer;

// Value to reset watchdog timer to.
// If we're idle for this many memory control cycles,
// shut down our clock (our rescheduling of ourselves).
// Refresh shuts down as well.
// When we restart, we'll be in a different phase
// with respect to ruby cycles, so this introduces
// a slight inaccuracy.  But it is necessary or the
// ruby tester never terminates because the event
// queue is never empty.
#define IDLECOUNT_MAX_VALUE 1000

// Output operator definition

ostream&
operator<<(ostream& out, const MemoryControl& obj)
{
    obj.print(out);
    out << flush;
    return out;
}


// ****************************************************************

// CONSTRUCTOR
MemoryControl::MemoryControl(const Params *p)
    : SimObject(p)
{
    m_mem_bus_cycle_multiplier = p->mem_bus_cycle_multiplier;
    m_banks_per_rank = p->banks_per_rank;
    m_ranks_per_dimm = p->ranks_per_dimm;
    m_dimms_per_channel = p->dimms_per_channel;
    m_bank_bit_0 = p->bank_bit_0;
    m_rank_bit_0 = p->rank_bit_0;
    m_dimm_bit_0 = p->dimm_bit_0;
    m_bank_queue_size = p->bank_queue_size;
    m_bank_busy_time = p->bank_busy_time;
    m_rank_rank_delay = p->rank_rank_delay;
    m_read_write_delay = p->read_write_delay;
    m_basic_bus_busy_time = p->basic_bus_busy_time;
    m_mem_ctl_latency = p->mem_ctl_latency;
    m_refresh_period = p->refresh_period;
    m_tFaw = p->tFaw;
    m_mem_random_arbitrate = p->mem_random_arbitrate;
    m_mem_fixed_delay = p->mem_fixed_delay;

    m_profiler_ptr = new MemCntrlProfiler(name(),
                                          m_banks_per_rank,
                                          m_ranks_per_dimm,
                                          m_dimms_per_channel);
}

void
MemoryControl::init()
{
    m_msg_counter = 0;

    m_debug = 0;

    assert(m_tFaw <= 62); // must fit in a uint64 shift register

    m_total_banks = m_banks_per_rank * m_ranks_per_dimm * m_dimms_per_channel;
    m_total_ranks = m_ranks_per_dimm * m_dimms_per_channel;
    m_refresh_period_system = m_refresh_period / m_total_banks;

    m_bankQueues = new list<MemoryNode> [m_total_banks];
    assert(m_bankQueues);

    m_bankBusyCounter = new int [m_total_banks];
    assert(m_bankBusyCounter);

    m_oldRequest = new int [m_total_banks];
    assert(m_oldRequest);

    for (int i = 0; i < m_total_banks; i++) {
        m_bankBusyCounter[i] = 0;
        m_oldRequest[i] = 0;
    }

    m_busBusyCounter_Basic = 0;
    m_busBusyCounter_Write = 0;
    m_busBusyCounter_ReadNewRank = 0;
    m_busBusy_WhichRank = 0;

    m_roundRobin = 0;
    m_refresh_count = 1;
    m_need_refresh = 0;
    m_refresh_bank = 0;
    m_awakened = 0;
    m_idleCount = 0;
    m_ageCounter = 0;

    // Each tfaw shift register keeps a moving bit pattern
    // which shows when recent activates have occurred.
    // m_tfaw_count keeps track of how many 1 bits are set
    // in each shift register.  When m_tfaw_count is >= 4,
    // new activates are not allowed.
    m_tfaw_shift = new uint64[m_total_ranks];
    m_tfaw_count = new int[m_total_ranks];
    for (int i = 0; i < m_total_ranks; i++) {
        m_tfaw_shift[i] = 0;
        m_tfaw_count[i] = 0;
    }
}

MemoryControl::~MemoryControl()
{
    delete [] m_bankQueues;
    delete [] m_bankBusyCounter;
    delete [] m_oldRequest;
    delete m_profiler_ptr;
}

// enqueue new request from directory
void
MemoryControl::enqueue(const MsgPtr& message, int latency)
{
    Time current_time = g_eventQueue_ptr->getTime();
    Time arrival_time = current_time + latency;
    const MemoryMsg* memMess = dynamic_cast<const MemoryMsg*>(message.ref());
    physical_address_t addr = memMess->getAddress().getAddress();
    MemoryRequestType type = memMess->getType();
    bool is_mem_read = (type == MemoryRequestType_MEMORY_READ);
    MemoryNode thisReq(arrival_time, message, addr, is_mem_read, !is_mem_read);
    enqueueMemRef(thisReq);
}

// Alternate entry point used when we already have a MemoryNode
// structure built.
void
MemoryControl::enqueueMemRef(MemoryNode& memRef)
{
    m_msg_counter++;
    memRef.m_msg_counter = m_msg_counter;
    Time arrival_time = memRef.m_time;
    uint64 at = arrival_time;
    bool is_mem_read = memRef.m_is_mem_read;
    physical_address_t addr = memRef.m_addr;
    int bank = getBank(addr);
    if (m_debug) {
        cprintf("New memory request%7d: %#08x %c arrived at %10d bank = %3x\n",
                m_msg_counter, addr, is_mem_read? 'R':'W', at, bank);
    }

    m_profiler_ptr->profileMemReq(bank);
    m_input_queue.push_back(memRef);
    if (!m_awakened) {
        g_eventQueue_ptr->scheduleEvent(this, 1);
        m_awakened = 1;
    }
}

// dequeue, peek, and isReady are used to transfer completed requests
// back to the directory
void
MemoryControl::dequeue()
{
    assert(isReady());
    m_response_queue.pop_front();
}

const Message*
MemoryControl::peek()
{
    MemoryNode node = peekNode();
    Message* msg_ptr = node.m_msgptr.ref();
    assert(msg_ptr != NULL);
    return msg_ptr;
}

MemoryNode
MemoryControl::peekNode()
{
    assert(isReady());
    MemoryNode req = m_response_queue.front();
    uint64 returnTime = req.m_time;
    if (m_debug) {
        cprintf("Old memory request%7d: %#08x %c peeked at  %10d\n",
                req.m_msg_counter, req.m_addr, req.m_is_mem_read ? 'R':'W',
                returnTime);
    }
    return req;
}

bool
MemoryControl::isReady()
{
    return ((!m_response_queue.empty()) &&
            (m_response_queue.front().m_time <= g_eventQueue_ptr->getTime()));
}

void
MemoryControl::setConsumer(Consumer* consumer_ptr)
{
    m_consumer_ptr = consumer_ptr;
}

void
MemoryControl::print(ostream& out) const
{
}

void
MemoryControl::printConfig(ostream& out)
{
    out << "Memory Control " << name() << ":" << endl;
    out << "  Ruby cycles per memory cycle: " << m_mem_bus_cycle_multiplier
        << endl;
    out << "  Basic read latency: " << m_mem_ctl_latency << endl;
    if (m_mem_fixed_delay) {
        out << "  Fixed Latency mode:  Added cycles = " << m_mem_fixed_delay
            << endl;
    } else {
        out << "  Bank busy time: " << m_bank_busy_time << " memory cycles"
            << endl;
        out << "  Memory channel busy time: " << m_basic_bus_busy_time << endl;
        out << "  Dead cycles between reads to different ranks: "
            << m_rank_rank_delay << endl;
        out << "  Dead cycle between a read and a write: "
            << m_read_write_delay << endl;
        out << "  tFaw (four-activate) window: " << m_tFaw << endl;
    }
    out << "  Banks per rank: " << m_banks_per_rank << endl;
    out << "  Ranks per DIMM: " << m_ranks_per_dimm << endl;
    out << "  DIMMs per channel:  " << m_dimms_per_channel << endl;
    out << "  LSB of bank field in address: " << m_bank_bit_0 << endl;
    out << "  LSB of rank field in address: " << m_rank_bit_0 << endl;
    out << "  LSB of DIMM field in address: " << m_dimm_bit_0 << endl;
    out << "  Max size of each bank queue: " << m_bank_queue_size << endl;
    out << "  Refresh period (within one bank): " << m_refresh_period << endl;
    out << "  Arbitration randomness: " << m_mem_random_arbitrate << endl;
}

void
MemoryControl::setDebug(int debugFlag)
{
    m_debug = debugFlag;
}

void
MemoryControl::clearStats() const
{
    m_profiler_ptr->clearStats();
}

void
MemoryControl::printStats(ostream& out) const
{
    m_profiler_ptr->printStats(out);
}

// Queue up a completed request to send back to directory
void
MemoryControl::enqueueToDirectory(MemoryNode req, int latency)
{
    Time arrival_time = g_eventQueue_ptr->getTime()
        + (latency * m_mem_bus_cycle_multiplier);
    req.m_time = arrival_time;
    m_response_queue.push_back(req);

    // schedule the wake up
    g_eventQueue_ptr->scheduleEventAbsolute(m_consumer_ptr, arrival_time);
}

// getBank returns an integer that is unique for each
// bank across this memory controller.
int
MemoryControl::getBank(physical_address_t addr)
{
    int dimm = (addr >> m_dimm_bit_0) & (m_dimms_per_channel - 1);
    int rank = (addr >> m_rank_bit_0) & (m_ranks_per_dimm - 1);
    int bank = (addr >> m_bank_bit_0) & (m_banks_per_rank - 1);
    return (dimm * m_ranks_per_dimm * m_banks_per_rank)
        + (rank * m_banks_per_rank)
        + bank;
}

// getRank returns an integer that is unique for each rank
// and independent of individual bank.
int
MemoryControl::getRank(int bank)
{
    int rank = (bank / m_banks_per_rank);
    assert (rank < (m_ranks_per_dimm * m_dimms_per_channel));
    return rank;
}

// queueReady determines if the head item in a bank queue
// can be issued this cycle
bool
MemoryControl::queueReady(int bank)
{
    if ((m_bankBusyCounter[bank] > 0) && !m_mem_fixed_delay) {
        m_profiler_ptr->profileMemBankBusy();
#if 0
        if (m_debug)
            printf("  bank %x busy %d\n", bank, m_bankBusyCounter[bank]);
#endif
        return false;
    }

    if (m_mem_random_arbitrate >= 2) {
        if ((random() % 100) < m_mem_random_arbitrate) {
            m_profiler_ptr->profileMemRandBusy();
            return false;
        }
    }

    if (m_mem_fixed_delay)
        return true;

    if ((m_ageCounter > (2 * m_bank_busy_time)) && !m_oldRequest[bank]) {
        m_profiler_ptr->profileMemNotOld();
        return false;
    }

    if (m_busBusyCounter_Basic == m_basic_bus_busy_time) {
        // Another bank must have issued this same cycle.  For
        // profiling, we count this as an arb wait rather than a bus
        // wait.  This is a little inaccurate since it MIGHT have also
        // been blocked waiting for a read-write or a read-read
        // instead, but it's pretty close.
        m_profiler_ptr->profileMemArbWait(1);
        return false;
    }

    if (m_busBusyCounter_Basic > 0) {
        m_profiler_ptr->profileMemBusBusy();
        return false;
    }

    int rank = getRank(bank);
    if (m_tfaw_count[rank] >= ACTIVATE_PER_TFAW) {
        m_profiler_ptr->profileMemTfawBusy();
        return false;
    }

    bool write = !m_bankQueues[bank].front().m_is_mem_read;
    if (write && (m_busBusyCounter_Write > 0)) {
        m_profiler_ptr->profileMemReadWriteBusy();
        return false;
    }

    if (!write && (rank != m_busBusy_WhichRank)
        && (m_busBusyCounter_ReadNewRank > 0)) {
        m_profiler_ptr->profileMemDataBusBusy();
        return false;
    }

    return true;
}

// issueRefresh checks to see if this bank has a refresh scheduled
// and, if so, does the refresh and returns true
bool
MemoryControl::issueRefresh(int bank)
{
    if (!m_need_refresh || (m_refresh_bank != bank))
        return false;
    if (m_bankBusyCounter[bank] > 0)
        return false;
    // Note that m_busBusyCounter will prevent multiple issues during
    // the same cycle, as well as on different but close cycles:
    if (m_busBusyCounter_Basic > 0)
        return false;
    int rank = getRank(bank);
    if (m_tfaw_count[rank] >= ACTIVATE_PER_TFAW)
        return false;

    // Issue it:
#if 0
    if (m_debug) {
        uint64 current_time = g_eventQueue_ptr->getTime();
        printf("    Refresh bank %3x at %lld\n", bank, current_time);
    }
#endif

    m_profiler_ptr->profileMemRefresh();
    m_need_refresh--;
    m_refresh_bank++;
    if (m_refresh_bank >= m_total_banks)
        m_refresh_bank = 0;
    m_bankBusyCounter[bank] = m_bank_busy_time;
    m_busBusyCounter_Basic = m_basic_bus_busy_time;
    m_busBusyCounter_Write = m_basic_bus_busy_time;
    m_busBusyCounter_ReadNewRank = m_basic_bus_busy_time;
    markTfaw(rank);
    return true;
}

// Mark the activate in the tFaw shift register
void
MemoryControl::markTfaw(int rank)
{
    if (m_tFaw) {
        m_tfaw_shift[rank] |= (1 << (m_tFaw-1));
        m_tfaw_count[rank]++;
    }
}

// Issue a memory request: Activate the bank, reserve the address and
// data buses, and queue the request for return to the requesting
// processor after a fixed latency.
void
MemoryControl::issueRequest(int bank)
{
    int rank = getRank(bank);
    MemoryNode req = m_bankQueues[bank].front();
    m_bankQueues[bank].pop_front();
    if (m_debug) {
        uint64 current_time = g_eventQueue_ptr->getTime();
        cprintf("    Mem issue request%7d: %#08x %c         at %10d  "
                "bank=%3x\n",
                req.m_msg_counter, req.m_addr, req.m_is_mem_read? 'R':'W',
                current_time, bank);
    }
    if (req.m_msgptr.ref() != NULL) {  // don't enqueue L3 writebacks
        enqueueToDirectory(req, m_mem_ctl_latency + m_mem_fixed_delay);
    }
    m_oldRequest[bank] = 0;
    markTfaw(rank);
    m_bankBusyCounter[bank] = m_bank_busy_time;
    m_busBusy_WhichRank = rank;
    if (req.m_is_mem_read) {
        m_profiler_ptr->profileMemRead();
        m_busBusyCounter_Basic = m_basic_bus_busy_time;
        m_busBusyCounter_Write = m_basic_bus_busy_time + m_read_write_delay;
        m_busBusyCounter_ReadNewRank =
            m_basic_bus_busy_time + m_rank_rank_delay;
    } else {
        m_profiler_ptr->profileMemWrite();
        m_busBusyCounter_Basic = m_basic_bus_busy_time;
        m_busBusyCounter_Write = m_basic_bus_busy_time;
        m_busBusyCounter_ReadNewRank = m_basic_bus_busy_time;
    }
}

// executeCycle:  This function is called once per memory clock cycle
// to simulate all the periodic hardware.
void
MemoryControl::executeCycle()
{
    // Keep track of time by counting down the busy counters:
    for (int bank=0; bank < m_total_banks; bank++) {
        if (m_bankBusyCounter[bank] > 0) m_bankBusyCounter[bank]--;
    }
    if (m_busBusyCounter_Write > 0)
        m_busBusyCounter_Write--;
    if (m_busBusyCounter_ReadNewRank > 0)
        m_busBusyCounter_ReadNewRank--;
    if (m_busBusyCounter_Basic > 0)
        m_busBusyCounter_Basic--;

    // Count down the tFAW shift registers:
    for (int rank=0; rank < m_total_ranks; rank++) {
        if (m_tfaw_shift[rank] & 1) m_tfaw_count[rank]--;
        m_tfaw_shift[rank] >>= 1;
    }

    // After time period expires, latch an indication that we need a refresh.
    // Disable refresh if in mem_fixed_delay mode.
    if (!m_mem_fixed_delay) m_refresh_count--;
    if (m_refresh_count == 0) {
        m_refresh_count = m_refresh_period_system;

        // Are we overrunning our ability to refresh?
        assert(m_need_refresh < 10);
        m_need_refresh++;
    }

    // If this batch of requests is all done, make a new batch:
    m_ageCounter++;
    int anyOld = 0;
    for (int bank=0; bank < m_total_banks; bank++) {
        anyOld |= m_oldRequest[bank];
    }
    if (!anyOld) {
        for (int bank=0; bank < m_total_banks; bank++) {
            if (!m_bankQueues[bank].empty()) m_oldRequest[bank] = 1;
        }
        m_ageCounter = 0;
    }

    // If randomness desired, re-randomize round-robin position each cycle
    if (m_mem_random_arbitrate) {
        m_roundRobin = random() % m_total_banks;
    }

    // For each channel, scan round-robin, and pick an old, ready
    // request and issue it.  Treat a refresh request as if it were at
    // the head of its bank queue.  After we issue something, keep
    // scanning the queues just to gather statistics about how many
    // are waiting.  If in mem_fixed_delay mode, we can issue more
    // than one request per cycle.
    int queueHeads = 0;
    int banksIssued = 0;
    for (int i = 0; i < m_total_banks; i++) {
        m_roundRobin++;
        if (m_roundRobin >= m_total_banks) m_roundRobin = 0;
        issueRefresh(m_roundRobin);
        int qs = m_bankQueues[m_roundRobin].size();
        if (qs > 1) {
            m_profiler_ptr->profileMemBankQ(qs-1);
        }
        if (qs > 0) {
            // we're not idle if anything is queued
            m_idleCount = IDLECOUNT_MAX_VALUE;
            queueHeads++;
            if (queueReady(m_roundRobin)) {
                issueRequest(m_roundRobin);
                banksIssued++;
                if (m_mem_fixed_delay) {
                    m_profiler_ptr->profileMemWaitCycles(m_mem_fixed_delay);
                }
            }
        }
    }

    // memWaitCycles is a redundant catch-all for the specific
    // counters in queueReady
    m_profiler_ptr->profileMemWaitCycles(queueHeads - banksIssued);

    // Check input queue and move anything to bank queues if not full.
    // Since this is done here at the end of the cycle, there will
    // always be at least one cycle of latency in the bank queue.  We
    // deliberately move at most one request per cycle (to simulate
    // typical hardware).  Note that if one bank queue fills up, other
    // requests can get stuck behind it here.
    if (!m_input_queue.empty()) {
        // we're not idle if anything is pending
        m_idleCount = IDLECOUNT_MAX_VALUE;
        MemoryNode req = m_input_queue.front();
        int bank = getBank(req.m_addr);
        if (m_bankQueues[bank].size() < m_bank_queue_size) {
            m_input_queue.pop_front();
            m_bankQueues[bank].push_back(req);
        }
        m_profiler_ptr->profileMemInputQ(m_input_queue.size());
    }
}

// wakeup:  This function is called once per memory controller clock cycle.
void
MemoryControl::wakeup()
{
    // execute everything
    executeCycle();

    m_idleCount--;
    if (m_idleCount <= 0) {
        m_awakened = 0;
    } else {
        // Reschedule ourselves so that we run every memory cycle:
        g_eventQueue_ptr->scheduleEvent(this, m_mem_bus_cycle_multiplier);
    }
}

MemoryControl *
RubyMemoryControlParams::create()
{
    return new MemoryControl(this);
}