summaryrefslogtreecommitdiff
path: root/src/mem/DRAMCtrl.py
blob: f78a7370dab849e9c05fc5ecb5391e68d6ac97c9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
# Copyright (c) 2012-2018 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder.  You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2013 Amin Farmahini-Farahani
# Copyright (c) 2015 University of Kaiserslautern
# Copyright (c) 2015 The University of Bologna
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Andreas Hansson
#          Ani Udipi
#          Omar Naji
#          Matthias Jung
#          Erfan Azarkhish

from m5.params import *
from m5.proxy import *
from AbstractMemory import *

# Enum for memory scheduling algorithms, currently First-Come
# First-Served and a First-Row Hit then First-Come First-Served
class MemSched(Enum): vals = ['fcfs', 'frfcfs']

# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
# channel, rank, bank, row and column, respectively, and going from
# MSB to LSB.  Available are RoRaBaChCo and RoRaBaCoCh, that are
# suitable for an open-page policy, optimising for sequential accesses
# hitting in the open row. For a closed-page policy, RoCoRaBaCh
# maximises parallelism.
class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']

# Enum for the page policy, either open, open_adaptive, close, or
# close_adaptive.
class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
                                'close_adaptive']

# DRAMCtrl is a single-channel single-ported DRAM controller model
# that aims to model the most important system-level performance
# effects of a DRAM without getting into too much detail of the DRAM
# itself.
class DRAMCtrl(AbstractMemory):
    type = 'DRAMCtrl'
    cxx_header = "mem/dram_ctrl.hh"

    # single-ported on the system interface side, instantiate with a
    # bus in front of the controller for multiple ports
    port = SlavePort("Slave port")

    # the basic configuration of the controller architecture, note
    # that each entry corresponds to a burst for the specific DRAM
    # configuration (e.g. x32 with burst length 8 is 32 bytes) and not
    # the cacheline size or request/packet size
    write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
    read_buffer_size = Param.Unsigned(32, "Number of read queue entries")

    # threshold in percent for when to forcefully trigger writes and
    # start emptying the write buffer
    write_high_thresh_perc = Param.Percent(85, "Threshold to force writes")

    # threshold in percentage for when to start writes if the read
    # queue is empty
    write_low_thresh_perc = Param.Percent(50, "Threshold to start writes")

    # minimum write bursts to schedule before switching back to reads
    min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before "
                                           "switching to reads")

    # scheduler, address map and page policy
    mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
    addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
    page_policy = Param.PageManage('open_adaptive', "Page management policy")

    # enforce a limit on the number of accesses per row
    max_accesses_per_row = Param.Unsigned(16, "Max accesses per row before "
                                          "closing");

    # size of DRAM Chip in Bytes
    device_size = Param.MemorySize("Size of DRAM chip")

    # pipeline latency of the controller and PHY, split into a
    # frontend part and a backend part, with reads and writes serviced
    # by the queues only seeing the frontend contribution, and reads
    # serviced by the memory seeing the sum of the two
    static_frontend_latency = Param.Latency("10ns", "Static frontend latency")
    static_backend_latency = Param.Latency("10ns", "Static backend latency")

    # the physical organisation of the DRAM
    device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\
                                      "device/chip")
    burst_length = Param.Unsigned("Burst lenght (BL) in beats")
    device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
                                           "device/chip")
    devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
    ranks_per_channel = Param.Unsigned("Number of ranks per channel")

    # default to 0 bank groups per rank, indicating bank group architecture
    # is not used
    # update per memory class when bank group architecture is supported
    bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank")
    banks_per_rank = Param.Unsigned("Number of banks per rank")
    # only used for the address mapping as the controller by
    # construction is a single channel and multiple controllers have
    # to be instantiated for a multi-channel configuration
    channels = Param.Unsigned(1, "Number of channels")

    # For power modelling we need to know if the DRAM has a DLL or not
    dll = Param.Bool(True, "DRAM has DLL or not")

    # DRAMPower provides in addition to the core power, the possibility to
    # include RD/WR termination and IO power. This calculation assumes some
    # default values. The integration of DRAMPower with gem5 does not include
    # IO and RD/WR termination power by default. This might be added as an
    # additional feature in the future.

    # timing behaviour and constraints - all in nanoseconds

    # the base clock period of the DRAM
    tCK = Param.Latency("Clock period")

    # the amount of time in nanoseconds from issuing an activate command
    # to the data being available in the row buffer for a read/write
    tRCD = Param.Latency("RAS to CAS delay")

    # the time from issuing a read/write command to seeing the actual data
    tCL = Param.Latency("CAS latency")

    # minimum time between a precharge and subsequent activate
    tRP = Param.Latency("Row precharge time")

    # minimum time between an activate and a precharge to the same row
    tRAS = Param.Latency("ACT to PRE delay")

    # minimum time between a write data transfer and a precharge
    tWR = Param.Latency("Write recovery time")

    # minimum time between a read and precharge command
    tRTP = Param.Latency("Read to precharge")

    # time to complete a burst transfer, typically the burst length
    # divided by two due to the DDR bus, but by making it a parameter
    # it is easier to also evaluate SDR memories like WideIO.
    # This parameter has to account for burst length.
    # Read/Write requests with data size larger than one full burst are broken
    # down into multiple requests in the controller
    # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
    # With bank group architectures, tBURST represents the CAS-to-CAS
    # delay for bursts to different bank groups (tCCD_S)
    tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)")

    # CAS-to-CAS delay for bursts to the same bank group
    # only utilized with bank group architectures; set to 0 for default case
    # tBURST is equivalent to tCCD_S; no explicit parameter required
    # for CAS-to-CAS delay for bursts to different bank groups
    tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")

    # Write-to-Write delay for bursts to the same bank group
    # only utilized with bank group architectures; set to 0 for default case
    # This will be used to enable different same bank group delays
    # for writes versus reads
    tCCD_L_WR = Param.Latency(Self.tCCD_L,
        "Same bank group Write to Write delay")

    # time taken to complete one refresh cycle (N rows in all banks)
    tRFC = Param.Latency("Refresh cycle time")

    # refresh command interval, how often a "ref" command needs
    # to be sent. It is 7.8 us for a 64ms refresh requirement
    tREFI = Param.Latency("Refresh command interval")

    # write-to-read, same rank turnaround penalty
    tWTR = Param.Latency("Write to read, same rank switching time")

    # read-to-write, same rank turnaround penalty
    tRTW = Param.Latency("Read to write, same rank switching time")

    # rank-to-rank bus delay penalty
    # this does not correlate to a memory timing parameter and encompasses:
    # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
    # different rank bus delay
    tCS = Param.Latency("Rank to rank switching time")

    # minimum row activate to row activate delay time
    tRRD = Param.Latency("ACT to ACT delay")

    # only utilized with bank group architectures; set to 0 for default case
    tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")

    # time window in which a maximum number of activates are allowed
    # to take place, set to 0 to disable
    tXAW = Param.Latency("X activation window")
    activation_limit = Param.Unsigned("Max number of activates in window")

    # time to exit power-down mode
    # Exit power-down to next valid command delay
    tXP = Param.Latency("0ns", "Power-up Delay")

    # Exit Powerdown to commands requiring a locked DLL
    tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")

    # time to exit self-refresh mode
    tXS = Param.Latency("0ns", "Self-refresh exit latency")

    # time to exit self-refresh mode with locked DLL
    tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")

    # Currently rolled into other params
    ######################################################################

    # tRC  - assumed to be tRAS + tRP

    # Power Behaviour and Constraints
    # DRAMs like LPDDR and WideIO have 2 external voltage domains. These are
    # defined as VDD and VDD2. Each current is defined for each voltage domain
    # separately. For example, current IDD0 is active-precharge current for
    # voltage domain VDD and current IDD02 is active-precharge current for
    # voltage domain VDD2.
    # By default all currents are set to 0mA. Users who are only interested in
    # the performance of DRAMs can leave them at 0.

    # Operating 1 Bank Active-Precharge current
    IDD0 = Param.Current("0mA", "Active precharge current")

    # Operating 1 Bank Active-Precharge current multiple voltage Range
    IDD02 = Param.Current("0mA", "Active precharge current VDD2")

    # Precharge Power-down Current: Slow exit
    IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")

    # Precharge Power-down Current: Slow exit multiple voltage Range
    IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")

    # Precharge Power-down Current: Fast exit
    IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")

    # Precharge Power-down Current: Fast exit multiple voltage Range
    IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")

    # Precharge Standby current
    IDD2N = Param.Current("0mA", "Precharge Standby current")

    # Precharge Standby current multiple voltage range
    IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")

    # Active Power-down current: slow exit
    IDD3P0 = Param.Current("0mA", "Active Powerdown slow")

    # Active Power-down current: slow exit multiple voltage range
    IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")

    # Active Power-down current : fast exit
    IDD3P1 = Param.Current("0mA", "Active Powerdown fast")

    # Active Power-down current : fast exit multiple voltage range
    IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")

    # Active Standby current
    IDD3N = Param.Current("0mA", "Active Standby current")

    # Active Standby current multiple voltage range
    IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")

    # Burst Read Operating Current
    IDD4R = Param.Current("0mA", "READ current")

    # Burst Read Operating Current multiple voltage range
    IDD4R2 = Param.Current("0mA", "READ current VDD2")

    # Burst Write Operating Current
    IDD4W = Param.Current("0mA", "WRITE current")

    # Burst Write Operating Current multiple voltage range
    IDD4W2 = Param.Current("0mA", "WRITE current VDD2")

    # Refresh Current
    IDD5 = Param.Current("0mA", "Refresh current")

    # Refresh Current multiple voltage range
    IDD52 = Param.Current("0mA", "Refresh current VDD2")

    # Self-Refresh Current
    IDD6 = Param.Current("0mA", "Self-refresh Current")

    # Self-Refresh Current multiple voltage range
    IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")

    # Main voltage range of the DRAM
    VDD = Param.Voltage("0V", "Main Voltage Range")

    # Second voltage range defined by some DRAMs
    VDD2 = Param.Voltage("0V", "2nd Voltage Range")

# A single DDR3-1600 x64 channel (one command and address bus), with
# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
# an 8x8 configuration.
class DDR3_1600_8x8(DRAMCtrl):
    # size of device in bytes
    device_size = '512MB'

    # 8x8 configuration, 8 devices each with an 8-bit interface
    device_bus_width = 8

    # DDR3 is a BL8 device
    burst_length = 8

    # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
    device_rowbuffer_size = '1kB'

    # 8x8 configuration, so 8 devices
    devices_per_rank = 8

    # Use two ranks
    ranks_per_channel = 2

    # DDR3 has 8 banks in all configurations
    banks_per_rank = 8

    # 800 MHz
    tCK = '1.25ns'

    # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz
    tBURST = '5ns'

    # DDR3-1600 11-11-11
    tRCD = '13.75ns'
    tCL = '13.75ns'
    tRP = '13.75ns'
    tRAS = '35ns'
    tRRD = '6ns'
    tXAW = '30ns'
    activation_limit = 4
    tRFC = '260ns'

    tWR = '15ns'

    # Greater of 4 CK or 7.5 ns
    tWTR = '7.5ns'

    # Greater of 4 CK or 7.5 ns
    tRTP = '7.5ns'

    # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
    tRTW = '2.5ns'

    # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
    tCS = '2.5ns'

    # <=85C, half for >85C
    tREFI = '7.8us'

    # active powerdown and precharge powerdown exit time
    tXP = '6ns'

    # self refresh exit time
    tXS = '270ns'

    # Current values from datasheet Die Rev E,J
    IDD0 = '55mA'
    IDD2N = '32mA'
    IDD3N = '38mA'
    IDD4W = '125mA'
    IDD4R = '157mA'
    IDD5 = '235mA'
    IDD3P1 = '38mA'
    IDD2P1 = '32mA'
    IDD6 = '20mA'
    VDD = '1.5V'

# A single HMC-2500 x32 model based on:
# [1] DRAMSpec: a high-level DRAM bank modelling tool
# developed at the University of Kaiserslautern. This high level tool
# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
# estimate the DRAM bank latency and power numbers.
# [2] High performance AXI-4.0 based interconnect for extensible smart memory
# cubes (E. Azarkhish et. al)
# Assumed for the HMC model is a 30 nm technology node.
# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory (4
# layers).
# Each layer has 16 vaults and each vault consists of 2 banks per layer.
# In order to be able to use the same controller used for 2D DRAM generations
# for HMC, the following analogy is done:
# Channel (DDR) => Vault (HMC)
# device_size (DDR) => size of a single layer in a vault
# ranks per channel (DDR) => number of layers
# banks per rank (DDR) => banks per layer
# devices per rank (DDR) => devices per layer ( 1 for HMC).
# The parameters for which no input is available are inherited from the DDR3
# configuration.
# This configuration includes the latencies from the DRAM to the logic layer
# of the HMC
class HMC_2500_1x32(DDR3_1600_8x8):
    # size of device
    # two banks per device with each bank 4MB [2]
    device_size = '8MB'

    # 1x32 configuration, 1 device with 32 TSVs [2]
    device_bus_width = 32

    # HMC is a BL8 device [2]
    burst_length = 8

    # Each device has a page (row buffer) size of 256 bytes [2]
    device_rowbuffer_size = '256B'

    # 1x32 configuration, so 1 device [2]
    devices_per_rank = 1

    # 4 layers so 4 ranks [2]
    ranks_per_channel = 4

    # HMC has 2 banks per layer [2]
    # Each layer represents a rank. With 4 layers and 8 banks in total, each
    # layer has 2 banks; thus 2 banks per rank.
    banks_per_rank = 2

    # 1250 MHz [2]
    tCK = '0.8ns'

    # 8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
    tBURST = '3.2ns'

    # Values using DRAMSpec HMC model [1]
    tRCD = '10.2ns'
    tCL = '9.9ns'
    tRP = '7.7ns'
    tRAS = '21.6ns'

    # tRRD depends on the power supply network for each vendor.
    # We assume a tRRD of a double bank approach to be equal to 4 clock
    # cycles (Assumption)
    tRRD = '3.2ns'

    # activation limit is set to 0 since there are only 2 banks per vault
    # layer.
    activation_limit = 0

    # Values using DRAMSpec HMC model [1]
    tRFC = '59ns'
    tWR = '8ns'
    tRTP = '4.9ns'

    # Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz =
    # 0.8 ns (Assumption)
    tCS = '0.8ns'

    # Value using DRAMSpec HMC model [1]
    tREFI = '3.9us'

    # The default page policy in the vault controllers is simple closed page
    # [2] nevertheless 'close' policy opens and closes the row multiple times
    # for bursts largers than 32Bytes. For this reason we use 'close_adaptive'
    page_policy = 'close_adaptive'

    # RoCoRaBaCh resembles the default address mapping in HMC
    addr_mapping = 'RoCoRaBaCh'
    min_writes_per_switch = 8

    # These parameters do not directly correlate with buffer_size in real
    # hardware. Nevertheless, their value has been tuned to achieve a
    # bandwidth similar to the cycle-accurate model in [2]
    write_buffer_size = 32
    read_buffer_size = 32

    # The static latency of the vault controllers is estimated to be smaller
    # than a full DRAM channel controller
    static_backend_latency='4ns'
    static_frontend_latency='4ns'

# A single DDR3-2133 x64 channel refining a selected subset of the
# options for the DDR-1600 configuration, based on the same DDR3-1600
# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
# consistent across the two configurations.
class DDR3_2133_8x8(DDR3_1600_8x8):
    # 1066 MHz
    tCK = '0.938ns'

    # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
    tBURST = '3.752ns'

    # DDR3-2133 14-14-14
    tRCD = '13.09ns'
    tCL = '13.09ns'
    tRP = '13.09ns'
    tRAS = '33ns'
    tRRD = '5ns'
    tXAW = '25ns'

    # Current values from datasheet
    IDD0 = '70mA'
    IDD2N = '37mA'
    IDD3N = '44mA'
    IDD4W = '157mA'
    IDD4R = '191mA'
    IDD5 = '250mA'
    IDD3P1 = '44mA'
    IDD2P1 = '43mA'
    IDD6 ='20mA'
    VDD = '1.5V'

# A single DDR4-2400 x64 channel (one command and address bus), with
# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
# in an 16x4 configuration.
# Total channel capacity is 32GB
# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
class DDR4_2400_16x4(DRAMCtrl):
    # size of device
    device_size = '1GB'

    # 16x4 configuration, 16 devices each with a 4-bit interface
    device_bus_width = 4

    # DDR4 is a BL8 device
    burst_length = 8

    # Each device has a page (row buffer) size of 512 byte (1K columns x4)
    device_rowbuffer_size = '512B'

    # 16x4 configuration, so 16 devices
    devices_per_rank = 16

    # Match our DDR3 configurations which is dual rank
    ranks_per_channel = 2

    # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
    # Set to 4 for x4 case
    bank_groups_per_rank = 4

    # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
    # configurations). Currently we do not capture the additional
    # constraints incurred by the bank groups
    banks_per_rank = 16

    # override the default buffer sizes and go for something larger to
    # accommodate the larger bank count
    write_buffer_size = 128
    read_buffer_size = 64

    # 1200 MHz
    tCK = '0.833ns'

    # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
    # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
    # With bank group architectures, tBURST represents the CAS-to-CAS
    # delay for bursts to different bank groups (tCCD_S)
    tBURST = '3.332ns'

    # @2400 data rate, tCCD_L is 6 CK
    # CAS-to-CAS delay for bursts to the same bank group
    # tBURST is equivalent to tCCD_S; no explicit parameter required
    # for CAS-to-CAS delay for bursts to different bank groups
    tCCD_L = '5ns';

    # DDR4-2400 17-17-17
    tRCD = '14.16ns'
    tCL = '14.16ns'
    tRP = '14.16ns'
    tRAS = '32ns'

    # RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
    tRRD = '3.332ns'

    # RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
    tRRD_L = '4.9ns';

    # tFAW for 512B page is MAX(16 CK, 13ns)
    tXAW = '13.328ns'
    activation_limit = 4
    # tRFC is 350ns
    tRFC = '350ns'

    tWR = '15ns'

    # Here using the average of WTR_S and WTR_L
    tWTR = '5ns'

    # Greater of 4 CK or 7.5 ns
    tRTP = '7.5ns'

    # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666 ns
    tRTW = '1.666ns'

    # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
    tCS = '1.666ns'

    # <=85C, half for >85C
    tREFI = '7.8us'

    # active powerdown and precharge powerdown exit time
    tXP = '6ns'

    # self refresh exit time
    # exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
    # tRFC + 10ns = 340ns
    tXS = '340ns'

    # Current values from datasheet
    IDD0 = '43mA'
    IDD02 = '3mA'
    IDD2N = '34mA'
    IDD3N = '38mA'
    IDD3N2 = '3mA'
    IDD4W = '103mA'
    IDD4R = '110mA'
    IDD5 = '250mA'
    IDD3P1 = '32mA'
    IDD2P1 = '25mA'
    IDD6 = '30mA'
    VDD = '1.2V'
    VDD2 = '2.5V'

# A single DDR4-2400 x64 channel (one command and address bus), with
# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
# in an 8x8 configuration.
# Total channel capacity is 16GB
# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
class DDR4_2400_8x8(DDR4_2400_16x4):
    # 8x8 configuration, 8 devices each with an 8-bit interface
    device_bus_width = 8

    # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
    device_rowbuffer_size = '1kB'

    # 8x8 configuration, so 8 devices
    devices_per_rank = 8

    # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
    tRRD_L = '4.9ns';

    tXAW = '21ns'

    # Current values from datasheet
    IDD0 = '48mA'
    IDD3N = '43mA'
    IDD4W = '123mA'
    IDD4R = '135mA'
    IDD3P1 = '37mA'

# A single DDR4-2400 x64 channel (one command and address bus), with
# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
# in an 4x16 configuration.
# Total channel capacity is 4GB
# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
class DDR4_2400_4x16(DDR4_2400_16x4):
    # 4x16 configuration, 4 devices each with an 16-bit interface
    device_bus_width = 16

    # Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
    device_rowbuffer_size = '2kB'

    # 4x16 configuration, so 4 devices
    devices_per_rank = 4

    # Single rank for x16
    ranks_per_channel = 1

    # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
    # Set to 2 for x16 case
    bank_groups_per_rank = 2

    # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
    # configurations). Currently we do not capture the additional
    # constraints incurred by the bank groups
    banks_per_rank = 8

    # RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
    tRRD = '5.3ns'

    # RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
    tRRD_L = '6.4ns';

    tXAW = '30ns'

    # Current values from datasheet
    IDD0 = '80mA'
    IDD02 = '4mA'
    IDD2N = '34mA'
    IDD3N = '47mA'
    IDD4W = '228mA'
    IDD4R = '243mA'
    IDD5 = '280mA'
    IDD3P1 = '41mA'

# A single LPDDR2-S4 x32 interface (one command/address bus), with
# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
# in a 1x32 configuration.
class LPDDR2_S4_1066_1x32(DRAMCtrl):
    # No DLL in LPDDR2
    dll = False

    # size of device
    device_size = '512MB'

    # 1x32 configuration, 1 device with a 32-bit interface
    device_bus_width = 32

    # LPDDR2_S4 is a BL4 and BL8 device
    burst_length = 8

    # Each device has a page (row buffer) size of 1KB
    # (this depends on the memory density)
    device_rowbuffer_size = '1kB'

    # 1x32 configuration, so 1 device
    devices_per_rank = 1

    # Use a single rank
    ranks_per_channel = 1

    # LPDDR2-S4 has 8 banks in all configurations
    banks_per_rank = 8

    # 533 MHz
    tCK = '1.876ns'

    # Fixed at 15 ns
    tRCD = '15ns'

    # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
    tCL = '15ns'

    # Pre-charge one bank 15 ns (all banks 18 ns)
    tRP = '15ns'

    tRAS = '42ns'
    tWR = '15ns'

    tRTP = '7.5ns'

    # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
    # Note this is a BL8 DDR device.
    # Requests larger than 32 bytes are broken down into multiple requests
    # in the controller
    tBURST = '7.5ns'

    # LPDDR2-S4, 4 Gbit
    tRFC = '130ns'
    tREFI = '3.9us'

    # active powerdown and precharge powerdown exit time
    tXP = '7.5ns'

    # self refresh exit time
    tXS = '140ns'

    # Irrespective of speed grade, tWTR is 7.5 ns
    tWTR = '7.5ns'

    # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
    tRTW = '3.75ns'

    # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
    tCS = '3.75ns'

    # Activate to activate irrespective of density and speed grade
    tRRD = '10.0ns'

    # Irrespective of density, tFAW is 50 ns
    tXAW = '50ns'
    activation_limit = 4

    # Current values from datasheet
    IDD0 = '15mA'
    IDD02 = '70mA'
    IDD2N = '2mA'
    IDD2N2 = '30mA'
    IDD3N = '2.5mA'
    IDD3N2 = '30mA'
    IDD4W = '10mA'
    IDD4W2 = '190mA'
    IDD4R = '3mA'
    IDD4R2 = '220mA'
    IDD5 = '40mA'
    IDD52 = '150mA'
    IDD3P1 = '1.2mA'
    IDD3P12 = '8mA'
    IDD2P1 = '0.6mA'
    IDD2P12 = '0.8mA'
    IDD6 = '1mA'
    IDD62 = '3.2mA'
    VDD = '1.8V'
    VDD2 = '1.2V'

# A single WideIO x128 interface (one command and address bus), with
# default timings based on an estimated WIO-200 8 Gbit part.
class WideIO_200_1x128(DRAMCtrl):
    # No DLL for WideIO
    dll = False

    # size of device
    device_size = '1024MB'

    # 1x128 configuration, 1 device with a 128-bit interface
    device_bus_width = 128

    # This is a BL4 device
    burst_length = 4

    # Each device has a page (row buffer) size of 4KB
    # (this depends on the memory density)
    device_rowbuffer_size = '4kB'

    # 1x128 configuration, so 1 device
    devices_per_rank = 1

    # Use one rank for a one-high die stack
    ranks_per_channel = 1

    # WideIO has 4 banks in all configurations
    banks_per_rank = 4

    # 200 MHz
    tCK = '5ns'

    # WIO-200
    tRCD = '18ns'
    tCL = '18ns'
    tRP = '18ns'
    tRAS = '42ns'
    tWR = '15ns'
    # Read to precharge is same as the burst
    tRTP = '20ns'

    # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz.
    # Note this is a BL4 SDR device.
    tBURST = '20ns'

    # WIO 8 Gb
    tRFC = '210ns'

    # WIO 8 Gb, <=85C, half for >85C
    tREFI = '3.9us'

    # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
    tWTR = '15ns'

    # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
    tRTW = '10ns'

    # Default different rank bus delay to 2 CK, @200 MHz = 10 ns
    tCS = '10ns'

    # Activate to activate irrespective of density and speed grade
    tRRD = '10.0ns'

    # Two instead of four activation window
    tXAW = '50ns'
    activation_limit = 2

    # The WideIO specification does not provide current information

# A single LPDDR3 x32 interface (one command/address bus), with
# default timings based on a LPDDR3-1600 4 Gbit part (Micron
# EDF8132A1MC) in a 1x32 configuration.
class LPDDR3_1600_1x32(DRAMCtrl):
    # No DLL for LPDDR3
    dll = False

    # size of device
    device_size = '512MB'

    # 1x32 configuration, 1 device with a 32-bit interface
    device_bus_width = 32

    # LPDDR3 is a BL8 device
    burst_length = 8

    # Each device has a page (row buffer) size of 4KB
    device_rowbuffer_size = '4kB'

    # 1x32 configuration, so 1 device
    devices_per_rank = 1

    # Technically the datasheet is a dual-rank package, but for
    # comparison with the LPDDR2 config we stick to a single rank
    ranks_per_channel = 1

    # LPDDR3 has 8 banks in all configurations
    banks_per_rank = 8

    # 800 MHz
    tCK = '1.25ns'

    tRCD = '18ns'

    # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
    tCL = '15ns'

    tRAS = '42ns'
    tWR = '15ns'

    # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
    tRTP = '7.5ns'

    # Pre-charge one bank 18 ns (all banks 21 ns)
    tRP = '18ns'

    # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
    # Note this is a BL8 DDR device.
    # Requests larger than 32 bytes are broken down into multiple requests
    # in the controller
    tBURST = '5ns'

    # LPDDR3, 4 Gb
    tRFC = '130ns'
    tREFI = '3.9us'

    # active powerdown and precharge powerdown exit time
    tXP = '7.5ns'

    # self refresh exit time
    tXS = '140ns'

    # Irrespective of speed grade, tWTR is 7.5 ns
    tWTR = '7.5ns'

    # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
    tRTW = '2.5ns'

    # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
    tCS = '2.5ns'

    # Activate to activate irrespective of density and speed grade
    tRRD = '10.0ns'

    # Irrespective of size, tFAW is 50 ns
    tXAW = '50ns'
    activation_limit = 4

    # Current values from datasheet
    IDD0 = '8mA'
    IDD02 = '60mA'
    IDD2N = '0.8mA'
    IDD2N2 = '26mA'
    IDD3N = '2mA'
    IDD3N2 = '34mA'
    IDD4W = '2mA'
    IDD4W2 = '190mA'
    IDD4R = '2mA'
    IDD4R2 = '230mA'
    IDD5 = '28mA'
    IDD52 = '150mA'
    IDD3P1 = '1.4mA'
    IDD3P12 = '11mA'
    IDD2P1 = '0.8mA'
    IDD2P12 = '1.8mA'
    IDD6 = '0.5mA'
    IDD62 = '1.8mA'
    VDD = '1.8V'
    VDD2 = '1.2V'

# A single GDDR5 x64 interface, with
# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
# H5GQ1H24AFR) in a 2x32 configuration.
class GDDR5_4000_2x32(DRAMCtrl):
    # size of device
    device_size = '128MB'

    # 2x32 configuration, 1 device with a 32-bit interface
    device_bus_width = 32

    # GDDR5 is a BL8 device
    burst_length = 8

    # Each device has a page (row buffer) size of 2Kbits (256Bytes)
    device_rowbuffer_size = '256B'

    # 2x32 configuration, so 2 devices
    devices_per_rank = 2

    # assume single rank
    ranks_per_channel = 1

    # GDDR5 has 4 bank groups
    bank_groups_per_rank = 4

    # GDDR5 has 16 banks with 4 bank groups
    banks_per_rank = 16

    # 1000 MHz
    tCK = '1ns'

    # 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
    # Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
    # 8 beats at 4000 MHz = 2 beats at 1000 MHz
    # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
    # With bank group architectures, tBURST represents the CAS-to-CAS
    # delay for bursts to different bank groups (tCCD_S)
    tBURST = '2ns'

    # @1000MHz data rate, tCCD_L is 3 CK
    # CAS-to-CAS delay for bursts to the same bank group
    # tBURST is equivalent to tCCD_S; no explicit parameter required
    # for CAS-to-CAS delay for bursts to different bank groups
    tCCD_L = '3ns';

    tRCD = '12ns'

    # tCL is not directly found in datasheet and assumed equal tRCD
    tCL = '12ns'

    tRP = '12ns'
    tRAS = '28ns'

    # RRD_S (different bank group)
    # RRD_S is 5.5 ns in datasheet.
    # rounded to the next multiple of tCK
    tRRD = '6ns'

    # RRD_L (same bank group)
    # RRD_L is 5.5 ns in datasheet.
    # rounded to the next multiple of tCK
    tRRD_L = '6ns'

    tXAW = '23ns'

    # tXAW < 4 x tRRD.
    # Therefore, activation limit is set to 0
    activation_limit = 0

    tRFC = '65ns'
    tWR = '12ns'

    # Here using the average of WTR_S and WTR_L
    tWTR = '5ns'

    # Read-to-Precharge 2 CK
    tRTP = '2ns'

    # Assume 2 cycles
    tRTW = '2ns'

# A single HBM x128 interface (one command and address bus), with
# default timings based on data publically released
# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
# IDD measurement values, and by extrapolating data from other classes.
# Architecture values based on published HBM spec
# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
class HBM_1000_4H_1x128(DRAMCtrl):
    # HBM gen1 supports up to 8 128-bit physical channels
    # Configuration defines a single channel, with the capacity
    # set to (full_ stack_capacity / 8) based on 2Gb dies
    # To use all 8 channels, set 'channels' parameter to 8 in
    # system configuration

    # 128-bit interface legacy mode
    device_bus_width = 128

    # HBM supports BL4 and BL2 (legacy mode only)
    burst_length = 4

    # size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
    # with 8 channels, 128MB per channel
    device_size = '128MB'

    device_rowbuffer_size = '2kB'

    # 1x128 configuration
    devices_per_rank = 1

    # HBM does not have a CS pin; set rank to 1
    ranks_per_channel = 1

    # HBM has 8 or 16 banks depending on capacity
    # 2Gb dies have 8 banks
    banks_per_rank = 8

    # depending on frequency, bank groups may be required
    # will always have 4 bank groups when enabled
    # current specifications do not define the minimum frequency for
    # bank group architecture
    # setting bank_groups_per_rank to 0 to disable until range is defined
    bank_groups_per_rank = 0

    # 500 MHz for 1Gbps DDR data rate
    tCK = '2ns'

    # use values from IDD measurement in JEDEC spec
    # use tRP value for tRCD and tCL similar to other classes
    tRP = '15ns'
    tRCD = '15ns'
    tCL = '15ns'
    tRAS = '33ns'

    # BL2 and BL4 supported, default to BL4
    # DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
    tBURST = '4ns'

    # value for 2Gb device from JEDEC spec
    tRFC = '160ns'

    # value for 2Gb device from JEDEC spec
    tREFI = '3.9us'

    # extrapolate the following from LPDDR configs, using ns values
    # to minimize burst length, prefetch differences
    tWR = '18ns'
    tRTP = '7.5ns'
    tWTR = '10ns'

    # start with 2 cycles turnaround, similar to other memory classes
    # could be more with variations across the stack
    tRTW = '4ns'

    # single rank device, set to 0
    tCS = '0ns'

    # from MemCon example, tRRD is 4ns with 2ns tCK
    tRRD = '4ns'

    # from MemCon example, tFAW is 30ns with 2ns tCK
    tXAW = '30ns'
    activation_limit = 4

    # 4tCK
    tXP = '8ns'

    # start with tRFC + tXP -> 160ns + 8ns = 168ns
    tXS = '168ns'

# A single HBM x64 interface (one command and address bus), with
# default timings based on HBM gen1 and data publically released
# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
# Note: This defines a pseudo-channel with a unique controller
# instantiated per pseudo-channel
# Stay at same IO rate (1Gbps) to maintain timing relationship with
# HBM gen1 class (HBM_1000_4H_x128) where possible
class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
    # For HBM gen2 with pseudo-channel mode, configure 2X channels.
    # Configuration defines a single pseudo channel, with the capacity
    # set to (full_ stack_capacity / 16) based on 8Gb dies
    # To use all 16 pseudo channels, set 'channels' parameter to 16 in
    # system configuration

    # 64-bit pseudo-channle interface
    device_bus_width = 64

    # HBM pseudo-channel only supports BL4
    burst_length = 4

    # size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
    # with 16 channels, 256MB per channel
    device_size = '256MB'

    # page size is halved with pseudo-channel; maintaining the same same number
    # of rows per pseudo-channel with 2X banks across 2 channels
    device_rowbuffer_size = '1kB'

    # HBM has 8 or 16 banks depending on capacity
    # Starting with 4Gb dies, 16 banks are defined
    banks_per_rank = 16

    # reset tRFC for larger, 8Gb device
    # use HBM1 4Gb value as a starting point
    tRFC = '260ns'

    # start with tRFC + tXP -> 160ns + 8ns = 168ns
    tXS = '268ns'
    # Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
    tCS = '2ns'
    tREFI = '3.9us'

    # active powerdown and precharge powerdown exit time
    tXP = '10ns'

    # self refresh exit time
    tXS = '65ns'