# Copyright (c) 2012-2014 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall # not be construed as granting a license to any other intellectual # property including but not limited to intellectual property relating # to a hardware implementation of the functionality of the software # licensed hereunder. You may use the software subject to the license # terms below provided that you ensure that this notice is replicated # unmodified and in its entirety in all distributions of the software, # modified or unmodified, in source code or in binary form. # # Copyright (c) 2013 Amin Farmahini-Farahani # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer; # redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution; # neither the name of the copyright holders nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Authors: Andreas Hansson # Ani Udipi from m5.params import * from AbstractMemory import * # Enum for memory scheduling algorithms, currently First-Come # First-Served and a First-Row Hit then First-Come First-Served class MemSched(Enum): vals = ['fcfs', 'frfcfs'] # Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting # channel, rank, bank, row and column, respectively, and going from # MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are # suitable for an open-page policy, optimising for sequential accesses # hitting in the open row. For a closed-page policy, RoCoRaBaCh # maximises parallelism. class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh'] # Enum for the page policy, either open, open_adaptive, close, or # close_adaptive. class PageManage(Enum): vals = ['open', 'open_adaptive', 'close', 'close_adaptive'] # DRAMCtrl is a single-channel single-ported DRAM controller model # that aims to model the most important system-level performance # effects of a DRAM without getting into too much detail of the DRAM # itself. class DRAMCtrl(AbstractMemory): type = 'DRAMCtrl' cxx_header = "mem/dram_ctrl.hh" # single-ported on the system interface side, instantiate with a # bus in front of the controller for multiple ports port = SlavePort("Slave port") # the basic configuration of the controller architecture write_buffer_size = Param.Unsigned(64, "Number of write queue entries") read_buffer_size = Param.Unsigned(32, "Number of read queue entries") # threshold in percent for when to forcefully trigger writes and # start emptying the write buffer write_high_thresh_perc = Param.Percent(85, "Threshold to force writes") # threshold in percentage for when to start writes if the read # queue is empty write_low_thresh_perc = Param.Percent(50, "Threshold to start writes") # minimum write bursts to schedule before switching back to reads min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before " "switching to reads") # scheduler, address map and page policy mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy") addr_mapping = Param.AddrMap('RoRaBaChCo', "Address mapping policy") page_policy = Param.PageManage('open_adaptive', "Page management policy") # enforce a limit on the number of accesses per row max_accesses_per_row = Param.Unsigned(16, "Max accesses per row before " "closing"); # pipeline latency of the controller and PHY, split into a # frontend part and a backend part, with reads and writes serviced # by the queues only seeing the frontend contribution, and reads # serviced by the memory seeing the sum of the two static_frontend_latency = Param.Latency("10ns", "Static frontend latency") static_backend_latency = Param.Latency("10ns", "Static backend latency") # the physical organisation of the DRAM device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\ "device/chip") burst_length = Param.Unsigned("Burst lenght (BL) in beats") device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\ "device/chip") devices_per_rank = Param.Unsigned("Number of devices/chips per rank") ranks_per_channel = Param.Unsigned("Number of ranks per channel") banks_per_rank = Param.Unsigned("Number of banks per rank") # only used for the address mapping as the controller by # construction is a single channel and multiple controllers have # to be instantiated for a multi-channel configuration channels = Param.Unsigned(1, "Number of channels") # timing behaviour and constraints - all in nanoseconds # the base clock period of the DRAM tCK = Param.Latency("Clock period") # the amount of time in nanoseconds from issuing an activate command # to the data being available in the row buffer for a read/write tRCD = Param.Latency("RAS to CAS delay") # the time from issuing a read/write command to seeing the actual data tCL = Param.Latency("CAS latency") # minimum time between a precharge and subsequent activate tRP = Param.Latency("Row precharge time") # minimum time between an activate and a precharge to the same row tRAS = Param.Latency("ACT to PRE delay") # minimum time between a write data transfer and a precharge tWR = Param.Latency("Write recovery time") # minimum time between a read and precharge command tRTP = Param.Latency("Read to precharge") # time to complete a burst transfer, typically the burst length # divided by two due to the DDR bus, but by making it a parameter # it is easier to also evaluate SDR memories like WideIO. # This parameter has to account for burst length. # Read/Write requests with data size larger than one full burst are broken # down into multiple requests in the controller tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)") # time taken to complete one refresh cycle (N rows in all banks) tRFC = Param.Latency("Refresh cycle time") # refresh command interval, how often a "ref" command needs # to be sent. It is 7.8 us for a 64ms refresh requirement tREFI = Param.Latency("Refresh command interval") # write-to-read turn around penalty tWTR = Param.Latency("Write to read switching time") # read-to-write turn around penalty, bus turnaround delay tRTW = Param.Latency("Read to write switching time") # minimum row activate to row activate delay time tRRD = Param.Latency("ACT to ACT delay") # time window in which a maximum number of activates are allowed # to take place, set to 0 to disable tXAW = Param.Latency("X activation window") activation_limit = Param.Unsigned("Max number of activates in window") # Currently rolled into other params ###################################################################### # tRC - assumed to be tRAS + tRP # A single DDR3-1600 x64 channel (one command and address bus), with # timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in # an 8x8 configuration, amounting to 4 Gbyte of memory. class DDR3_1600_x64(DRAMCtrl): # 8x8 configuration, 8 devices each with an 8-bit interface device_bus_width = 8 # DDR3 is a BL8 device burst_length = 8 # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8) device_rowbuffer_size = '1kB' # 8x8 configuration, so 8 devices devices_per_rank = 8 # Use two ranks ranks_per_channel = 2 # DDR3 has 8 banks in all configurations banks_per_rank = 8 # 800 MHz tCK = '1.25ns' # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz tBURST = '5ns' # DDR3-1600 11-11-11 tRCD = '13.75ns' tCL = '13.75ns' tRP = '13.75ns' tRAS = '35ns' tRRD = '6ns' tXAW = '30ns' activation_limit = 4 tRFC = '260ns' tWR = '15ns' # Greater of 4 CK or 7.5 ns tWTR = '7.5ns' # Greater of 4 CK or 7.5 ns tRTP = '7.5ns' # Default read-to-write bus around to 2 CK, @800 MHz = 2.5 ns tRTW = '2.5ns' # <=85C, half for >85C tREFI = '7.8us' # A single DDR3-2133 x64 channel refining a selected subset of the # options for the DDR-1600 configuration, based on the same DDR3-1600 # 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept # consistent across the two configurations. class DDR3_2133_x64(DDR3_1600_x64): # 1066 MHz tCK = '0.938ns' # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz tBURST = '3.752ns' # DDR3-2133 14-14-14 tRCD = '13.09ns' tCL = '13.09ns' tRP = '13.09ns' tRAS = '33ns' tRRD = '5ns' tXAW = '25ns' # A single DDR4-2400 x64 channel (one command and address bus), with # timings based on a DDR4-2400 4 Gbit datasheet (Samsung K4A4G085WD) # in an 8x8 configuration, amounting to 4 Gbyte of memory. class DDR4_2400_x64(DRAMCtrl): # 8x8 configuration, 8 devices each with an 8-bit interface device_bus_width = 8 # DDR4 is a BL8 device burst_length = 8 # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8) device_rowbuffer_size = '1kB' # 8x8 configuration, so 8 devices devices_per_rank = 8 # Use a single rank ranks_per_channel = 1 # DDR4 has 16 banks (4 bank groups) in all # configurations. Currently we do not capture the additional # constraints incurred by the bank groups banks_per_rank = 16 # 1200 MHz tCK = '0.833ns' # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz tBURST = '3.333ns' # DDR4-2400 17-17-17 tRCD = '14.16ns' tCL = '14.16ns' tRP = '14.16ns' tRAS = '32ns' # Here using the average of RRD_S and RRD_L tRRD = '4.1ns' tXAW = '21ns' activation_limit = 4 tRFC = '260ns' tWR = '15ns' # Here using the average of WTR_S and WTR_L tWTR = '5ns' # Greater of 4 CK or 7.5 ns tRTP = '7.5ns' # Default read-to-write bus around to 2 CK, @1200 MHz = 1.666 ns tRTW = '1.666ns' # <=85C, half for >85C tREFI = '7.8us' # A single DDR3 x64 interface (one command and address bus), with # default timings based on DDR3-1333 4 Gbit parts in an 8x8 # configuration, which would amount to 4 GByte of memory. This # configuration is primarily for comparing with DRAMSim2, and all the # parameters except ranks_per_channel are based on the DRAMSim2 config # file DDR3_micron_32M_8B_x8_sg15.ini. Note that ranks_per_channel has # to be manually set, depending on size of the memory to be # simulated. By default DRAMSim2 has 2048MB of memory with a single # rank. Therefore for 4 GByte memory, set ranks_per_channel = 2 class DDR3_1333_x64_DRAMSim2(DRAMCtrl): # 8x8 configuration, 8 devices each with an 8-bit interface device_bus_width = 8 # DDR3 is a BL8 device burst_length = 8 # Each device has a page (row buffer) size of 1KB # (this depends on the memory density) device_rowbuffer_size = '1kB' # 8x8 configuration, so 8 devices devices_per_rank = 8 # Use two ranks ranks_per_channel = 2 # DDR3 has 8 banks in all configurations banks_per_rank = 8 # 666 MHs tCK = '1.5ns' tRCD = '15ns' tCL = '15ns' tRP = '15ns' tRAS = '36ns' tWR = '15ns' tRTP = '7.5ns' # 8 beats across an x64 interface translates to 4 clocks @ 666.66 MHz. # Note this is a BL8 DDR device. tBURST = '6ns' tRFC = '160ns' # DDR3, <=85C, half for >85C tREFI = '7.8us' # Greater of 4 CK or 7.5 ns, 4 CK @ 666.66 MHz = 6 ns tWTR = '7.5ns' # Default read-to-write bus around to 2 CK, @666.66 MHz = 3 ns tRTW = '3ns' tRRD = '6.0ns' tXAW = '30ns' activation_limit = 4 # A single LPDDR2-S4 x32 interface (one command/address bus), with # default timings based on a LPDDR2-1066 4 Gbit part in a 1x32 # configuration. class LPDDR2_S4_1066_x32(DRAMCtrl): # 1x32 configuration, 1 device with a 32-bit interface device_bus_width = 32 # LPDDR2_S4 is a BL4 and BL8 device burst_length = 8 # Each device has a page (row buffer) size of 1KB # (this depends on the memory density) device_rowbuffer_size = '1kB' # 1x32 configuration, so 1 device devices_per_rank = 1 # Use a single rank ranks_per_channel = 1 # LPDDR2-S4 has 8 banks in all configurations banks_per_rank = 8 # 533 MHz tCK = '1.876ns' # Fixed at 15 ns tRCD = '15ns' # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time tCL = '15ns' # Pre-charge one bank 15 ns (all banks 18 ns) tRP = '15ns' tRAS = '42ns' tWR = '15ns' # 6 CK read to precharge delay tRTP = '11.256ns' # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz. # Note this is a BL8 DDR device. # Requests larger than 32 bytes are broken down into multiple requests # in the controller tBURST = '7.5ns' # LPDDR2-S4, 4 Gbit tRFC = '130ns' tREFI = '3.9us' # Irrespective of speed grade, tWTR is 7.5 ns tWTR = '7.5ns' # Default read-to-write bus around to 2 CK, @533 MHz = 3.75 ns tRTW = '3.75ns' # Activate to activate irrespective of density and speed grade tRRD = '10.0ns' # Irrespective of density, tFAW is 50 ns tXAW = '50ns' activation_limit = 4 # A single WideIO x128 interface (one command and address bus), with # default timings based on an estimated WIO-200 8 Gbit part. class WideIO_200_x128(DRAMCtrl): # 1x128 configuration, 1 device with a 128-bit interface device_bus_width = 128 # This is a BL4 device burst_length = 4 # Each device has a page (row buffer) size of 4KB # (this depends on the memory density) device_rowbuffer_size = '4kB' # 1x128 configuration, so 1 device devices_per_rank = 1 # Use one rank for a one-high die stack ranks_per_channel = 1 # WideIO has 4 banks in all configurations banks_per_rank = 4 # 200 MHz tCK = '5ns' # WIO-200 tRCD = '18ns' tCL = '18ns' tRP = '18ns' tRAS = '42ns' tWR = '15ns' # Read to precharge is same as the burst tRTP = '20ns' # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz. # Note this is a BL4 SDR device. tBURST = '20ns' # WIO 8 Gb tRFC = '210ns' # WIO 8 Gb, <=85C, half for >85C tREFI = '3.9us' # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns tWTR = '15ns' # Default read-to-write bus around to 2 CK, @200 MHz = 10 ns tRTW = '10ns' # Activate to activate irrespective of density and speed grade tRRD = '10.0ns' # Two instead of four activation window tXAW = '50ns' activation_limit = 2 # A single LPDDR3 x32 interface (one command/address bus), with # default timings based on a LPDDR3-1600 4 Gbit part in a 1x32 # configuration class LPDDR3_1600_x32(DRAMCtrl): # 1x32 configuration, 1 device with a 32-bit interface device_bus_width = 32 # LPDDR3 is a BL8 device burst_length = 8 # Each device has a page (row buffer) size of 4KB device_rowbuffer_size = '4kB' # 1x32 configuration, so 1 device devices_per_rank = 1 # Use a single rank ranks_per_channel = 1 # LPDDR3 has 8 banks in all configurations banks_per_rank = 8 # 800 MHz tCK = '1.25ns' # Fixed at 15 ns tRCD = '15ns' # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time tCL = '15ns' tRAS = '42ns' tWR = '15ns' # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns tRTP = '7.5ns' # Pre-charge one bank 15 ns (all banks 18 ns) tRP = '15ns' # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz. # Note this is a BL8 DDR device. # Requests larger than 32 bytes are broken down into multiple requests # in the controller tBURST = '5ns' # LPDDR3, 4 Gb tRFC = '130ns' tREFI = '3.9us' # Irrespective of speed grade, tWTR is 7.5 ns tWTR = '7.5ns' # Default read-to-write bus around to 2 CK, @800 MHz = 2.5 ns tRTW = '2.5ns' # Activate to activate irrespective of density and speed grade tRRD = '10.0ns' # Irrespective of size, tFAW is 50 ns tXAW = '50ns' activation_limit = 4