diff options
author | Radhika Jagtap <radhika.jagtap@arm.com> | 2016-11-25 16:16:17 +0000 |
---|---|---|
committer | Andreas Sandberg <andreas.sandberg@arm.com> | 2017-11-16 16:39:19 +0000 |
commit | b549de4707ef22398b179a3a7fd28a996ee18d36 (patch) | |
tree | 27fd9ab016256f669744a3d2653a2e0bb8656a49 | |
parent | 290a7e7c5c059c1309ca3d02ea3158e2ca9ac338 (diff) | |
download | gem5-b549de4707ef22398b179a3a7fd28a996ee18d36.tar.xz |
config: Add low power sweep for DRAM
This change adds a Python script to sweep a few parameters with a
goal to trigger transitions to the low power states of the DRAM
controller.
This script is largely based on the sweep.py but is helpful to study
the impact of inter-transaction delay on the behaviour of the DRAM
in addition to typical sweep params like stride size, bank
utilization and read percent. An idle period is added as the last
traffic generator state to target hitting self-refresh.
Change-Id: I34380afffbf2de9f4e997dfe9fff5e615e077524
Reviewed-by: Wendy Elsasser <wendy.elsasser@arm.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/5725
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
-rw-r--r-- | configs/dram/low_power_sweep.py | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/configs/dram/low_power_sweep.py b/configs/dram/low_power_sweep.py new file mode 100644 index 000000000..610404332 --- /dev/null +++ b/configs/dram/low_power_sweep.py @@ -0,0 +1,251 @@ +# Copyright (c) 2014-2015, 2017 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Radhika Jagtap +# Andreas Hansson + +import argparse + +import m5 +from m5.objects import * +from m5.util import addToPath +from m5.stats import periodicStatDump + +addToPath('../common') +import MemConfig + +# This script aims at triggering low power state transitions in the DRAM +# controller. The traffic generator is used in DRAM mode and traffic +# states target a different levels of bank utilization and strides. +# At the end after sweeping through bank utilization and strides, we go +# through an idle state with no requests to enforce self-refresh. + +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +# Use a single-channel DDR4-2400 in 16x4 configuration by default +parser.add_argument("--mem-type", default="DDR4_2400_16x4", + choices=MemConfig.mem_names(), + help = "type of memory to use") + +parser.add_argument("--mem-ranks", "-r", type=int, default=1, + help = "Number of ranks to iterate across") + +parser.add_argument("--page-policy", "-p", + choices=["close_adaptive", "open_adaptive"], + default="close_adaptive", help="controller page policy") + +parser.add_argument("--itt-list", "-t", default="1 20 100", + help="a list of multipliers for the max value of itt, " \ + "e.g. \"1 20 100\"") + +parser.add_argument("--rd-perc", type=int, default=100, + help = "Percentage of read commands") + +parser.add_argument("--addr-map", type=int, default=1, + help = "0: RoCoRaBaCh; 1: RoRaBaCoCh/RoRaBaChCo") + +parser.add_argument("--idle-end", type=int, default=50000000, + help = "time in ps of an idle period at the end ") + +args = parser.parse_args() + +# Start with the system itself, using a multi-layer 2.0 GHz +# crossbar, delivering 64 bytes / 3 cycles (one header cycle) +# which amounts to 42.7 GByte/s per layer and thus per port. +system = System(membus = IOXBar(width = 32)) +system.clk_domain = SrcClockDomain(clock = '2.0GHz', + voltage_domain = + VoltageDomain(voltage = '1V')) + +# We are fine with 256 MB memory for now. +mem_range = AddrRange('256MB') +# Start address is 0 +system.mem_ranges = [mem_range] + +# Do not worry about reserving space for the backing store +system.mmap_using_noreserve = True + +# Force a single channel to match the assumptions in the DRAM traffic +# generator +args.mem_channels = 1 +args.external_memory_system = 0 +args.tlm_memory = 0 +args.elastic_trace_en = 0 +MemConfig.config_mem(args, system) + +# Sanity check for memory controller class. +if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl): + fatal("This script assumes the memory is a DRAMCtrl subclass") + +# There is no point slowing things down by saving any data. +system.mem_ctrls[0].null = True + +# Set the address mapping based on input argument +# Default to RoRaBaCoCh +if args.addr_map == 0: + system.mem_ctrls[0].addr_mapping = "RoCoRaBaCh" +elif args.addr_map == 1: + system.mem_ctrls[0].addr_mapping = "RoRaBaCoCh" +else: + fatal("Did not specify a valid address map argument") + +system.mem_ctrls[0].page_policy = args.page_policy + +# We create a traffic generator state for each param combination we want to +# test. Each traffic generator state is specified in the config file and the +# generator remains in the state for specific period. This period is 0.25 ms. +# Stats are dumped and reset at the state transition. +period = 250000000 + +# We specify the states in a config file input to the traffic generator. +cfg_file_name = "configs/dram/lowp_sweep.cfg" +cfg_file = open(cfg_file_name, 'w') + +# Get the number of banks +nbr_banks = int(system.mem_ctrls[0].banks_per_rank.value) + +# determine the burst size in bytes +burst_size = int((system.mem_ctrls[0].devices_per_rank.value * + system.mem_ctrls[0].device_bus_width.value * + system.mem_ctrls[0].burst_length.value) / 8) + +# next, get the page size in bytes (the rowbuffer size is already in bytes) +page_size = system.mem_ctrls[0].devices_per_rank.value * \ + system.mem_ctrls[0].device_rowbuffer_size.value + +# Inter-request delay should be such that we can hit as many transitions +# to/from low power states as possible to. We provide a min and max itt to the +# traffic generator and it randomises in the range. The parameter is in +# seconds and we need it in ticks (ps). +itt_min = system.mem_ctrls[0].tBURST.value * 1000000000000 + +#The itt value when set to (tRAS + tRP + tCK) covers the case where +# a read command is delayed beyond the delay from ACT to PRE_PDN entry of the +# previous command. For write command followed by precharge, this delay +# between a write and power down entry will be tRCD + tCL + tWR + tRP + tCK. +# As we use this delay as a unit and create multiples of it as bigger delays +# for the sweep, this parameter works for reads, writes and mix of them. +pd_entry_time = (system.mem_ctrls[0].tRAS.value + + system.mem_ctrls[0].tRP.value + + system.mem_ctrls[0].tCK.value) * 1000000000000 + +# We sweep itt max using the multipliers specified by the user. +itt_max_str = args.itt_list.strip().split() +itt_max_multiples = map(lambda x : int(x), itt_max_str) +if len(itt_max_multiples) == 0: + fatal("String for itt-max-list detected empty\n") + +itt_max_values = map(lambda m : pd_entry_time * m, itt_max_multiples) + +# Generate request addresses in the entire range, assume we start at 0 +max_addr = mem_range.end + +# For max stride, use min of the page size and 512 bytes as that should be +# more than enough +max_stride = min(512, page_size) +mid_stride = 4 * burst_size +stride_values = [burst_size, mid_stride, max_stride] + +# be selective about bank utilization instead of going from 1 to the number of +# banks +bank_util_values = [1, int(nbr_banks/2), nbr_banks] + +# Next we create the config file, but first a comment +cfg_file.write("""# STATE state# period mode=DRAM +# read_percent start_addr end_addr req_size min_itt max_itt data_limit +# stride_size page_size #banks #banks_util addr_map #ranks\n""") + +nxt_state = 0 +for itt_max in itt_max_values: + for bank in bank_util_values: + for stride_size in stride_values: + cfg_file.write("STATE %d %d %s %d 0 %d %d " + "%d %d %d %d %d %d %d %d %d\n" % + (nxt_state, period, "DRAM", args.rd_perc, max_addr, + burst_size, itt_min, itt_max, 0, stride_size, + page_size, nbr_banks, bank, args.addr_map, + args.mem_ranks)) + nxt_state = nxt_state + 1 + +# State for idle period +idle_period = args.idle_end +cfg_file.write("STATE %d %d IDLE\n" % (nxt_state, idle_period)) + +# Init state is state 0 +cfg_file.write("INIT 0\n") + +# Go through the states one by one +for state in range(1, nxt_state + 1): + cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state)) + +# Transition from last state to itself to not break the probability math +cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state, nxt_state)) +cfg_file.close() + +# create a traffic generator, and point it to the file we just created +system.tgen = TrafficGen(config_file = cfg_file_name) + +# add a communication monitor +system.monitor = CommMonitor() + +# connect the traffic generator to the bus via a communication monitor +system.tgen.port = system.monitor.slave +system.monitor.master = system.membus.slave + +# connect the system port even if it is not used in this example +system.system_port = system.membus.slave + +# every period, dump and reset all stats +periodicStatDump(period) + +root = Root(full_system = False, system = system) +root.system.mem_mode = 'timing' + +m5.instantiate() + +# Simulate for exactly as long as it takes to go through all the states +# This is why sim exists. +m5.simulate(nxt_state * period + idle_period) +print "--- Done DRAM low power sweep ---" +print "Fixed params - " +print "\tburst: %d, banks: %d, max stride: %d, itt min: %s ns" % \ + (burst_size, nbr_banks, max_stride, itt_min) +print "Swept params - " +print "\titt max multiples input:", itt_max_multiples +print "\titt max values", itt_max_values +print "\tbank utilization values", bank_util_values +print "\tstride values:", stride_values +print "Traffic gen config file:", cfg_file_name |