summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configs/common/HMC.py390
-rw-r--r--configs/common/MemConfig.py12
-rw-r--r--configs/example/hmctest.py170
-rw-r--r--ext/drampower/README.md4
-rw-r--r--ext/drampower/src/CmdScheduler.cc212
-rw-r--r--ext/drampower/src/CmdScheduler.h80
-rw-r--r--ext/drampower/src/CommandAnalysis.cc290
-rw-r--r--ext/drampower/src/CommandAnalysis.h29
-rw-r--r--ext/drampower/src/MemArchitectureSpec.h20
-rw-r--r--ext/drampower/src/MemCommand.cc43
-rw-r--r--ext/drampower/src/MemCommand.h42
-rw-r--r--ext/drampower/src/MemTimingSpec.h62
-rw-r--r--ext/drampower/src/MemoryPowerModel.cc331
-rw-r--r--ext/drampower/src/MemoryPowerModel.h12
-rw-r--r--ext/drampower/src/MemorySpecification.h11
-rw-r--r--ext/drampower/src/TraceParser.cc21
-rw-r--r--ext/drampower/src/TraceParser.h1
-rw-r--r--ext/drampower/src/Utils.h4
-rw-r--r--ext/drampower/src/libdrampower/LibDRAMPower.cc9
-rw-r--r--ext/drampower/src/libdrampower/LibDRAMPower.h2
-rw-r--r--ext/drampower/test/libdrampowertest/lib_test.cc5
-rw-r--r--src/mem/SerialLink.py2
-rw-r--r--src/mem/dram_ctrl.hh3
-rw-r--r--src/mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.py34
-rw-r--r--src/mem/serial_link.cc15
-rw-r--r--src/mem/serial_link.hh3
-rw-r--r--tests/quick/se/70.tgen/traffic.cfg7
-rw-r--r--util/systemc/main.cc9
-rw-r--r--util/systemc/stats.cc86
-rw-r--r--util/systemc/stats.hh4
-rw-r--r--util/tlm/README23
-rw-r--r--util/tlm/main.cc8
-rw-r--r--util/tlm/tlm_elastic.py123
33 files changed, 1296 insertions, 771 deletions
diff --git a/configs/common/HMC.py b/configs/common/HMC.py
index 130729f88..fcff94cc7 100644
--- a/configs/common/HMC.py
+++ b/configs/common/HMC.py
@@ -37,6 +37,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Erfan Azarkhish
+# Abdul Mutaal Ahmad
# A Simplified model of a complete HMC device. Based on:
# [1] http://www.hybridmemorycube.org/specification-download/
@@ -48,6 +49,10 @@
# (G. Kim et. al)
# [5] Near Data Processing, Are we there yet? (M. Gokhale)
# http://www.cs.utah.edu/wondp/gokhale.pdf
+# [6] openHMC - A Configurable Open-Source Hybrid Memory Cube Controller
+# (J. Schmidt)
+# [7] Hybrid Memory Cube performance characterization on data-centric
+# workloads (M. Gokhale)
#
# This script builds a complete HMC device composed of vault controllers,
# serial links, the main internal crossbar, and an external hmc controller.
@@ -60,23 +65,62 @@
# This component is simply an instance of the NoncoherentXBar class, and its
# parameters are tuned to [2].
#
-# - SERIAL LINKS:
+# - SERIAL LINKS CONTROLLER:
# SerialLink is a simple variation of the Bridge class, with the ability to
-# account for the latency of packet serialization. We assume that the
-# serializer component at the transmitter side does not need to receive the
-# whole packet to start the serialization. But the deserializer waits for
-# the complete packet to check its integrity first.
+# account for the latency of packet serialization and controller latency. We
+# assume that the serializer component at the transmitter side does not need
+# to receive the whole packet to start the serialization. But the
+# deserializer waits for the complete packet to check its integrity first.
+#
# * Bandwidth of the serial links is not modeled in the SerialLink component
-# itself. Instead bandwidth/port of the HMCController has been adjusted to
-# reflect the bandwidth delivered by 1 serial link.
+# itself.
+#
+# * Latency of serial link controller is composed of SerDes latency + link
+# controller
#
-# - HMC CONTROLLER:
-# Contains a large buffer (modeled with Bridge) to hide the access latency
-# of the memory cube. Plus it simply forwards the packets to the serial
-# links in a round-robin fashion to balance load among them.
# * It is inferred from the standard [1] and the literature [3] that serial
# links share the same address range and packets can travel over any of
# them so a load distribution mechanism is required among them.
+#
+# -----------------------------------------
+# | Host/HMC Controller |
+# | ---------------------- |
+# | | Link Aggregator | opt |
+# | ---------------------- |
+# | ---------------------- |
+# | | Serial Link + Ser | * 4 |
+# | ---------------------- |
+# |---------------------------------------
+# -----------------------------------------
+# | Device
+# | ---------------------- |
+# | | Xbar | * 4 |
+# | ---------------------- |
+# | ---------------------- |
+# | | Vault Controller | * 16 |
+# | ---------------------- |
+# | ---------------------- |
+# | | Memory | |
+# | ---------------------- |
+# |---------------------------------------|
+#
+# In this version we have present 3 different HMC archiecture along with
+# alongwith their corresponding test script.
+#
+# same: It has 4 crossbars in HMC memory. All the crossbars are connected
+# to each other, providing complete memory range. This archicture also covers
+# the added latency for sending a request to non-local vault(bridge in b/t
+# crossbars). All the 4 serial links can access complete memory. So each
+# link can be connected to separate processor.
+#
+# distributed: It has 4 crossbars inside the HMC. Crossbars are not
+# connected.Through each crossbar only local vaults can be accessed. But to
+# support this architecture we need a crossbar between serial links and
+# processor.
+#
+# mixed: This is a hybrid architecture. It has 4 crossbars inside the HMC.
+# 2 Crossbars are connected to only local vaults. From other 2 crossbar, a
+# request can be forwarded to any other vault.
import optparse
@@ -107,131 +151,277 @@ class HMCSystem(SubSystem):
# FIFOs at the input and output of the inteconnect)
xbar_response_latency = Param.Cycles(2, "Response latency of the XBar")
- #*****************************SERIAL LINK PARAMETERS**********************
- # Number of serial links [1]
- num_serial_links = Param.Unsigned(4, "Number of serial links")
+ # number of cross which connects 16 Vaults to serial link[7]
+ number_mem_crossbar = Param.Unsigned(4, "Number of crossbar in HMC"
+ )
+
+ #*****************************SERIAL LINK PARAMETERS***********************
+ # Number of serial links controllers [1]
+ num_links_controllers = Param.Unsigned(4, "Number of serial links")
# Number of packets (not flits) to store at the request side of the serial
# link. This number should be adjusted to achive required bandwidth
- link_buffer_size_req = Param.Unsigned(16, "Number of packets to buffer "
+ link_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer "
"at the request side of the serial link")
# Number of packets (not flits) to store at the response side of the serial
# link. This number should be adjusted to achive required bandwidth
- link_buffer_size_rsp = Param.Unsigned(16, "Number of packets to buffer "
+ link_buffer_size_rsp = Param.Unsigned(10, "Number of packets to buffer "
"at the response side of the serial link")
# Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus
# the PCB trace latency (3ns Estimated based on [5])
link_latency = Param.Latency('4.6ns', "Latency of the serial links")
- # Header overhead of the serial links: Header size is 128bits in HMC [1],
- # and we have 16 lanes, so the overhead is 8 cycles
- link_overhead = Param.Cycles(8, "The number of cycles required to"
- " transmit the packet header over the serial link")
-
- # Clock frequency of the serial links [1]
+ # Clock frequency of the each serial link(SerDes) [1]
link_frequency = Param.Frequency('10GHz', "Clock Frequency of the serial"
"links")
- # Number of parallel lanes in each serial link [1]
- num_lanes_per_link = Param.Unsigned(16, "Number of lanes per each link")
+ # Clock frequency of serial link Controller[6]
+ # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] /
+ # data_path_width * 10^6
+ # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz
+ link_controller_frequency = Param.Frequency('625MHz',
+ "Clock Frequency of the link controller")
- # Number of serial links [1]
- num_serial_links = Param.Unsigned(4, "Number of serial links")
+ # Latency of the serial link controller to process the packets[1][6]
+ # (ClockDomain = 625 Mhz )
+ # used here for calculations only
+ link_ctrl_latency = Param.Cycles(4, "The number of cycles required for the"
+ "controller to process the packet")
- #*****************************HMC CONTROLLER PARAMETERS*******************
- # Number of packets (not flits) to store at the HMC controller. This
- # number should be high enough to be able to hide the high latency of HMC
- ctrl_buffer_size_req = Param.Unsigned(256, "Number of packets to buffer "
- "at the HMC controller (request side)")
+ # total_ctrl_latency = link_ctrl_latency + link_latency
+ # total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns
+ total_ctrl_latency = Param.Latency('11ns', "The latency experienced by"
+ "every packet regardless of size of packet")
- # Number of packets (not flits) to store at the response side of the HMC
- # controller.
- ctrl_buffer_size_rsp = Param.Unsigned(256, "Number of packets to buffer "
- "at the HMC controller (response side)")
+ # Number of parallel lanes in each serial link [1]
+ num_lanes_per_link = Param.Unsigned( 16, "Number of lanes per each link")
- # Latency of the HMC controller to process the packets
- # (ClockDomain = Host clock domain)
- ctrl_latency = Param.Cycles(4, "The number of cycles required for the "
- " controller to process the packet")
+ # Number of serial links [1]
+ num_serial_links = Param.Unsigned(4, "Number of serial links")
- # Wiring latency from the SoC crossbar to the HMC controller
- ctrl_static_latency = Param.Latency('500ps', "Static latency of the HMC"
- "controller")
+ # speed of each lane of serial link - SerDes serial interface 10 Gb/s
+ serial_link_speed = Param.UInt64(10, "Gbs/s speed of each lane of"
+ "serial link")
- #*****************************PERFORMANCE MONITORING**********************
+ #*****************************PERFORMANCE MONITORING************************
# The main monitor behind the HMC Controller
- enable_global_monitor = Param.Bool(True, "The main monitor behind the "
+ enable_global_monitor = Param.Bool(False, "The main monitor behind the "
"HMC Controller")
# The link performance monitors
- enable_link_monitor = Param.Bool(True, "The link monitors")
+ enable_link_monitor = Param.Bool(False, "The link monitors" )
+
+ # link aggregator enable - put a cross between buffers & links
+ enable_link_aggr = Param.Bool(False, "The crossbar between port and "
+ "Link Controller")
+
+ enable_buff_div = Param.Bool(True, "Memory Range of Buffer is"
+ "divided between total range")
+
+ #*****************************HMC ARCHITECTURE ************************
+ # Memory chunk for 16 vault - numbers of vault / number of crossbars
+ mem_chunk = Param.Unsigned(4, "Chunk of memory range for each cross bar "
+ "in arch 0")
+
+ # size of req buffer within crossbar, used for modelling extra latency
+ # when the reuqest go to non-local vault
+ xbar_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer "
+ "at the request side of the crossbar")
+
+ # size of response buffer within crossbar, used for modelling extra latency
+ # when the response received from non-local vault
+ xbar_buffer_size_resp = Param.Unsigned(10, "Number of packets to buffer "
+ "at the response side of the crossbar")
+
+# configure host system with Serial Links
+def config_host_hmc(options, system):
+
+ system.hmc_host=HMCSystem()
+
+ try:
+ system.hmc_host.enable_global_monitor = options.enable_global_monitor
+ except:
+ pass;
+
+ try:
+ system.hmc_host.enable_link_monitor = options.enable_link_monitor
+ except:
+ pass;
+
+ # Serial link Controller with 16 SerDes links at 10 Gbps
+ # with serial link ranges w.r.t to architecture
+ system.hmc_host.seriallink = [SerialLink(ranges = options.ser_ranges[i],
+ req_size=system.hmc_host.link_buffer_size_req,
+ resp_size=system.hmc_host.link_buffer_size_rsp,
+ num_lanes=system.hmc_host.num_lanes_per_link,
+ link_speed=system.hmc_host.serial_link_speed,
+ delay=system.hmc_host.total_ctrl_latency)
+ for i in xrange(system.hmc_host.num_serial_links)]
+
+ # enable global monitor
+ if system.hmc_host.enable_global_monitor:
+ system.hmc_host.lmonitor = [ CommMonitor()
+ for i in xrange(system.hmc_host.num_serial_links)]
+
+ # set the clock frequency for serial link
+ for i in xrange(system.hmc_host.num_serial_links):
+ system.hmc_host.seriallink[i].clk_domain = SrcClockDomain(clock=system.
+ hmc_host.link_controller_frequency, voltage_domain=
+ VoltageDomain(voltage = '1V'))
+
+ # Connect membus/traffic gen to Serial Link Controller for differrent HMC
+ # architectures
+ if options.arch == "distributed":
+ for i in xrange(system.hmc_host.num_links_controllers):
+ if system.hmc_host.enable_global_monitor:
+ system.membus.master = system.hmc_host.lmonitor[i].slave
+ system.hmc_host.lmonitor[i].master = \
+ system.hmc_host.seriallink[i].slave
+ else:
+ system.membus.master = system.hmc_host.seriallink[i].slave
+ if options.arch == "mixed":
+ if system.hmc_host.enable_global_monitor:
+ system.membus.master = system.hmc_host.lmonitor[0].slave
+ system.hmc_host.lmonitor[0].master = \
+ system.hmc_host.seriallink[0].slave
+
+ system.membus.master = system.hmc_host.lmonitor[1].slave
+ system.hmc_host.lmonitor[1].master = \
+ system.hmc_host.seriallink[1].slave
+
+ system.tgen[2].port = system.hmc_host.lmonitor[2].slave
+ system.hmc_host.lmonitor[2].master = \
+ system.hmc_host.seriallink[2].slave
+
+ system.tgen[3].port = system.hmc_host.lmonitor[3].slave
+ system.hmc_host.lmonitor[3].master = \
+ system.hmc_host.seriallink[3].slave
+ else:
+ system.membus.master = system.hmc_host.seriallink[0].slave
+ system.membus.master = system.hmc_host.seriallink[1].slave
+ system.tgen[2].port = system.hmc_host.seriallink[2].slave
+ system.tgen[3].port = system.hmc_host.seriallink[3].slave
+ if options.arch == "same" :
+ for i in xrange(system.hmc_host.num_links_controllers):
+ if system.hmc_host.enable_global_monitor:
+ system.tgen[i].port = system.hmc_host.lmonitor[i].slave
+ system.hmc_host.lmonitor[i].master = \
+ system.hmc_host.seriallink[i].slave
+ else:
+ system.tgen[i].port = system.hmc_host.seriallink[i].slave
+
+ return system
# Create an HMC device and attach it to the current system
-def config_hmc(options, system):
+def config_hmc(options, system, hmc_host):
- system.hmc = HMCSystem()
+ # Create HMC device
+ system.hmc_dev = HMCSystem()
- system.buffer = Bridge(ranges=system.mem_ranges,
- req_size=system.hmc.ctrl_buffer_size_req,
- resp_size=system.hmc.ctrl_buffer_size_rsp,
- delay=system.hmc.ctrl_static_latency)
+ # Global monitor
try:
- system.hmc.enable_global_monitor = options.enable_global_monitor
+ system.hmc_dev.enable_global_monitor = options.enable_global_monitor
except:
pass;
try:
- system.hmc.enable_link_monitor = options.enable_link_monitor
+ system.hmc_dev.enable_link_monitor = options.enable_link_monitor
except:
pass;
- system.membus.master = system.buffer.slave
-
- # The HMC controller (Clock domain is the same as the host)
- system.hmccontroller = HMCController(width=(system.hmc.num_lanes_per_link.
- value * system.hmc.num_serial_links/8),
- frontend_latency=system.hmc.ctrl_latency,
- forward_latency=system.hmc.link_overhead,
- response_latency=system.hmc.link_overhead)
-
- system.hmccontroller.clk_domain = SrcClockDomain(clock=system.hmc.
- link_frequency, voltage_domain = VoltageDomain(voltage = '1V'))
-
- # Serial Links
- system.hmc.seriallink =[ SerialLink(ranges = system.mem_ranges,
- req_size=system.hmc.link_buffer_size_req,
- resp_size=system.hmc.link_buffer_size_rsp,
- num_lanes=system.hmc.num_lanes_per_link,
- delay=system.hmc.link_latency)
- for i in xrange(system.hmc.num_serial_links)]
-
- if system.hmc.enable_link_monitor:
- system.hmc.lmonitor = [ CommMonitor()
- for i in xrange(system.hmc.num_serial_links)]
-
- # The HMC Crossbar located in its logic-base (LoB)
- system.hmc.xbar = NoncoherentXBar(width = system.hmc.xbar_width,
- frontend_latency=system.hmc.xbar_frontend_latency,
- forward_latency=system.hmc.xbar_forward_latency,
- response_latency=system.hmc.xbar_response_latency )
- system.hmc.xbar.clk_domain = SrcClockDomain(clock =
- system.hmc.xbar_frequency, voltage_domain =
- VoltageDomain(voltage = '1V'))
-
- if system.hmc.enable_global_monitor:
- system.gmonitor = CommMonitor()
- system.buffer.master = system.gmonitor.slave
- system.gmonitor.master = system.hmccontroller.slave
- else:
- system.hmccontroller.slave = system.buffer.master
-
- for i in xrange(system.hmc.num_serial_links):
- system.hmccontroller.master = system.hmc.seriallink[i].slave
- system.hmc.seriallink[i].clk_domain = system.hmccontroller.clk_domain;
- if system.hmc.enable_link_monitor:
- system.hmc.seriallink[i].master = system.hmc.lmonitor[i].slave
- system.hmc.lmonitor[i].master = system.hmc.xbar.slave
+
+ if system.hmc_dev.enable_link_monitor:
+ system.hmc_dev.lmonitor = [ CommMonitor()
+ for i in xrange(system.hmc_dev.num_links_controllers)]
+
+ # 4 HMC Crossbars located in its logic-base (LoB)
+ system.hmc_dev.xbar = [ NoncoherentXBar(width=system.hmc_dev.xbar_width,
+ frontend_latency=system.hmc_dev.xbar_frontend_latency,
+ forward_latency=system.hmc_dev.xbar_forward_latency,
+ response_latency=system.hmc_dev.xbar_response_latency )
+ for i in xrange(system.hmc_host.number_mem_crossbar)]
+
+ for i in xrange(system.hmc_dev.number_mem_crossbar):
+ system.hmc_dev.xbar[i].clk_domain = SrcClockDomain(
+ clock=system.hmc_dev.xbar_frequency,voltage_domain=
+ VoltageDomain(voltage='1V'))
+
+ # Attach 4 serial link to 4 crossbar/s
+ for i in xrange(system.hmc_dev.num_serial_links):
+ if system.hmc_dev.enable_link_monitor:
+ system.hmc_host.seriallink[i].master = \
+ system.hmc_dev.lmonitor[i].slave
+ system.hmc_dev.lmonitor[i].master = system.hmc_dev.xbar[i].slave
else:
- system.hmc.seriallink[i].master = system.hmc.xbar.slave
+ system.hmc_host.seriallink[i].master = system.hmc_dev.xbar[i].slave
+
+ # Connecting xbar with each other for request arriving at the wrong xbar,
+ # then it will be forward to correct xbar. Bridge is used to connect xbars
+ if options.arch == "same":
+ numx = len(system.hmc_dev.xbar)
+
+ # create a list of buffers
+ system.hmc_dev.buffers = [ Bridge(
+ req_size=system.hmc_dev.xbar_buffer_size_req,
+ resp_size=system.hmc_dev.xbar_buffer_size_resp)
+ for i in xrange(numx * (system.hmc_dev.mem_chunk - 1))]
+
+ # Buffer iterator
+ it = iter(range(len(system.hmc_dev.buffers)))
+
+ # necesarry to add system_port to one of the xbar
+ system.system_port = system.hmc_dev.xbar[3].slave
+
+ # iterate over all the crossbars and connect them as required
+ for i in range(numx):
+ for j in range(numx):
+ # connect xbar to all other xbars except itself
+ if i != j:
+ # get the next index of buffer
+ index = it.next()
+
+ # Change the default values for ranges of bridge
+ system.hmc_dev.buffers[index].ranges = system.mem_ranges[
+ j * int(system.hmc_dev.mem_chunk):
+ (j + 1) * int(system.hmc_dev.mem_chunk)]
+
+ # Connect the bridge between corssbars
+ system.hmc_dev.xbar[i].master = system.hmc_dev.buffers[
+ index].slave
+ system.hmc_dev.buffers[
+ index].master = system.hmc_dev.xbar[j].slave
+ else:
+ # Don't connect the xbar to itself
+ pass
+
+ # Two crossbars are connected to all other crossbars-Other 2 vault
+ # can only direct traffic to it local vaults
+ if options.arch == "mixed":
+
+ system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
+ system.hmc_dev.xbar[3].master = system.hmc_dev.buffer30.slave
+ system.hmc_dev.buffer30.master = system.hmc_dev.xbar[0].slave
+
+ system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
+ system.hmc_dev.xbar[3].master = system.hmc_dev.buffer31.slave
+ system.hmc_dev.buffer31.master = system.hmc_dev.xbar[1].slave
+
+ system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
+ system.hmc_dev.xbar[3].master = system.hmc_dev.buffer32.slave
+ system.hmc_dev.buffer32.master = system.hmc_dev.xbar[2].slave
+
+
+ system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
+ system.hmc_dev.xbar[2].master = system.hmc_dev.buffer20.slave
+ system.hmc_dev.buffer20.master = system.hmc_dev.xbar[0].slave
+
+ system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
+ system.hmc_dev.xbar[2].master = system.hmc_dev.buffer21.slave
+ system.hmc_dev.buffer21.master = system.hmc_dev.xbar[1].slave
+
+ system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
+ system.hmc_dev.xbar[2].master = system.hmc_dev.buffer23.slave
+ system.hmc_dev.buffer23.master = system.hmc_dev.xbar[3].slave
+
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index 4685cd5d1..71e3bf460 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -153,9 +153,10 @@ def config_mem(options, system):
"""
if ( options.mem_type == "HMC_2500_x32"):
- HMC.config_hmc(options, system)
- subsystem = system.hmc
- xbar = system.hmc.xbar
+ HMChost = HMC.config_host_hmc(options, system)
+ HMC.config_hmc(options, system, HMChost.hmc_host)
+ subsystem = system.hmc_dev
+ xbar = system.hmc_dev.xbar
else:
subsystem = system
xbar = system.membus
@@ -222,4 +223,7 @@ def config_mem(options, system):
# Connect the controllers to the membus
for i in xrange(len(subsystem.mem_ctrls)):
- subsystem.mem_ctrls[i].port = xbar.master
+ if (options.mem_type == "HMC_2500_x32"):
+ subsystem.mem_ctrls[i].port = xbar[i/4].master
+ else:
+ subsystem.mem_ctrls[i].port = xbar.master
diff --git a/configs/example/hmctest.py b/configs/example/hmctest.py
new file mode 100644
index 000000000..bd6ca24d1
--- /dev/null
+++ b/configs/example/hmctest.py
@@ -0,0 +1,170 @@
+import optparse
+import sys
+import subprocess
+
+import m5
+from m5.objects import *
+from m5.util import addToPath
+
+addToPath('../common')
+import MemConfig
+import HMC
+
+parser = optparse.OptionParser()
+
+# Use a HMC_2500_x32 by default
+parser.add_option("--mem-type", type = "choice", default = "HMC_2500_x32",
+ choices = MemConfig.mem_names(),
+ help = "type of memory to use")
+
+parser.add_option("--ranks", "-r", type = "int", default = 1,
+ help = "Number of ranks to iterate across")
+
+parser.add_option("--rd_perc", type ="int", default=100,
+ help = "Percentage of read commands")
+
+parser.add_option("--mode", type ="choice", default ="DRAM",
+ choices = ["DRAM", "DRAM_ROTATE", "RANDOM"],
+ help = "DRAM: Random traffic; \
+ DRAM_ROTATE: Traffic rotating across banks and ranks"
+ )
+
+parser.add_option("--addr_map", type ="int", default = 1,
+ help = "0: RoCoRaBaCh; 1: RoRaBaCoCh/RoRaBaChCo")
+
+parser.add_option("--arch", type = "choice", default = "distributed",
+ choices = ["same", "distributed", "mixed"],
+ help = "same: HMC-4 links with same range\
+ distributed: HMC-4 links with distributed range\
+ mixed: mixed with same & distributed range")
+
+parser.add_option("--linkaggr", type = "int", default = 0,
+ help = "1: enable link crossbar, 0: disable link crossbar")
+
+parser.add_option("--num_cross", type = "int", default = 4,
+ help = "1: number of crossbar in HMC=1;\
+ 4: number of crossbar = 4")
+
+parser.add_option("--tlm-memory", type = "string",
+ help="use external port for SystemC TLM cosimulation")
+
+parser.add_option("--elastic-trace-en", action ="store_true",
+ help = """Enable capture of data dependency and instruction
+ fetch traces using elastic trace probe.""")
+
+(options, args) = parser.parse_args()
+
+if args:
+ print "Error: script doesn't take any positional arguments"
+ sys.exit(1)
+
+system = System()
+system.clk_domain = SrcClockDomain(clock='100GHz',
+ voltage_domain=
+ VoltageDomain(voltage = '1V'))
+# Create additional crossbar for arch1
+if options.arch == "distributed" or options.arch == "mixed" :
+ system.membus = NoncoherentXBar( width=8 )
+ system.membus.badaddr_responder = BadAddr()
+ system.membus.default = Self.badaddr_responder.pio
+ system.membus.width = 8
+ system.membus.frontend_latency = 3
+ system.membus.forward_latency = 4
+ system.membus.response_latency = 2
+
+ system.membus.clk_domain = SrcClockDomain(clock='100GHz', voltage_domain=
+ VoltageDomain(voltage = '1V'))
+
+# we are considering 4GB HMC device with following parameters
+# hmc_device_size = '4GB'
+# hmc_num_vaults = 16
+# hmc_vault_size = '256MB'
+# hmc_stack_size = 8
+# hmc_bank_in_stack = 2
+# hmc_bank_size = '16MB'
+# hmc_bank_in_vault = 16
+
+# determine the burst length in bytes
+burst_size = 256
+num_serial_links = 4
+num_vault_ctrl = 16
+options.mem_channels = 1
+options.external_memory_system = 0
+options.mem_ranks=1
+stride_size = burst_size
+system.cache_line_size = burst_size
+
+# Enable performance monitoring
+options.enable_global_monitor = True
+options.enable_link_monitor = False
+
+# Bytes used for calculations
+oneGBytes = 1024 * 1024 * 1024
+oneMBytes = 1024 * 1024
+
+# Memory ranges of 16 vault controller - Total_HMC_size / 16
+mem_range_vault = [ AddrRange(i * 256 * oneMBytes, ((i + 1) * 256 * oneMBytes)
+ - 1)
+ for i in range(num_vault_ctrl)]
+
+# Memmory ranges of serial link for arch-0
+# Same as the ranges of vault controllers - 4 vault - to - 1 serial link
+if options.arch == "same":
+ ser_range = [ AddrRange(0, (4 * oneGBytes) - 1)
+ for i in range(num_serial_links)]
+ options.ser_ranges = ser_range
+
+# Memmory ranges of serial link for arch-1
+# Distributed range accross links
+if options.arch == "distributed":
+ ser_range = [ AddrRange(i * oneGBytes, ((i + 1) * oneGBytes) - 1)
+ for i in range(num_serial_links)]
+ options.ser_ranges = ser_range
+
+# Memmory ranges of serial link for arch-2
+# "Mixed" address distribution over links
+if options.arch == "mixed":
+ ser_range0 = AddrRange(0 , (1 * oneGBytes) - 1)
+ ser_range1 = AddrRange(1 * oneGBytes , (2 * oneGBytes) - 1)
+ ser_range2 = AddrRange(0 , (4 * oneGBytes) - 1)
+ ser_range3 = AddrRange(0 , (4 * oneGBytes) - 1)
+ options.ser_ranges = [ser_range0, ser_range1, ser_range2, ser_range3]
+
+# Assign ranges of vault controller to system ranges
+system.mem_ranges = mem_range_vault
+
+# open traffic generator
+cfg_file_name = "./tests/quick/se/70.tgen/traffic.cfg"
+cfg_file = open(cfg_file_name, 'r')
+
+# number of traffic generator
+np = 4
+# create a traffic generator, and point it to the file we just created
+system.tgen = [ TrafficGen(config_file = cfg_file_name) for i in xrange(np)]
+
+# Config memory system with given HMC arch
+MemConfig.config_mem(options, system)
+
+if options.arch == "distributed":
+ for i in xrange(np):
+ system.tgen[i].port = system.membus.slave
+ # connect the system port even if it is not used in this example
+ system.system_port = system.membus.slave
+
+if options.arch == "mixed":
+ for i in xrange(int(np/2)):
+ system.tgen[i].port = system.membus.slave
+ # connect the system port even if it is not used in this example
+ system.system_port = system.membus.slave
+
+
+# run Forrest, run!
+root = Root(full_system = False, system = system)
+root.system.mem_mode = 'timing'
+
+m5.instantiate()
+m5.simulate(10000000000)
+
+m5.stats.dump()
+
+print "Done!"
diff --git a/ext/drampower/README.md b/ext/drampower/README.md
index a43298b01..5d6eb6e82 100644
--- a/ext/drampower/README.md
+++ b/ext/drampower/README.md
@@ -252,8 +252,8 @@ The tool is based on the DRAM power model developed jointly by the Computer Engi
**To cite the DRAMPower Tool:**
```
-[1] "DRAMPower: Open-source DRAM power & energy estimation tool"
-Karthik Chandrasekar, Christian Weis, Yonghui Li, Benny Akesson, Norbert Wehn, and Kees Goossens
+[1] DRAMPower: Open-source DRAM Power & Energy Estimation Tool
+Karthik Chandrasekar, Christian Weis, Yonghui Li, Sven Goossens, Matthias Jung, Omar Naji, Benny Akesson, Norbert Wehn, and Kees Goossens
URL: http://www.drampower.info
```
diff --git a/ext/drampower/src/CmdScheduler.cc b/ext/drampower/src/CmdScheduler.cc
index bffc5d3bb..a4619b94e 100644
--- a/ext/drampower/src/CmdScheduler.cc
+++ b/ext/drampower/src/CmdScheduler.cc
@@ -31,7 +31,7 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * Authors: Karthik Chandrasekar
+ * Authors: Karthik Chandrasekar, Yonghui Li, Sven Goossens
*
*/
#include "CmdScheduler.h"
@@ -42,17 +42,20 @@
#include <algorithm> // For max
+#define MILLION 1000000
+
+
using namespace std;
using namespace Data;
// Read the traces and get the transaction. Each transaction is executed by
// scheduling a number of commands to the memory. Hence, the transactions are
// translated into a sequence of commands which will be used for power analysis.
-void cmdScheduler::transTranslation(MemorySpecification memSpec,
+void cmdScheduler::transTranslation(const MemorySpecification& memSpec,
ifstream& trans_trace, int grouping, int interleaving, int burst, int powerdown)
{
commands.open("commands.trace", ifstream::out);
- MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
+ const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
nBanks = memArchSpec.nbrOfBanks;
nColumns = memArchSpec.nbrOfColumns;
burstLength = memArchSpec.burstLength;
@@ -77,13 +80,14 @@ void cmdScheduler::transTranslation(MemorySpecification memSpec,
} // cmdScheduler::transTranslation
// initialize the variables and vectors for starting command scheduling.
-void cmdScheduler::schedulingInitialization(MemorySpecification memSpec)
+void cmdScheduler::schedulingInitialization(const MemorySpecification& memSpec)
{
- MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
+ const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
- ACT.resize(2 * memSpec.memArchSpec.nbrOfBanks);
- RDWR.resize(2 * memSpec.memArchSpec.nbrOfBanks);
- PRE.resize(memSpec.memArchSpec.nbrOfBanks);
+ const size_t numBanks = static_cast<size_t>(memSpec.memArchSpec.nbrOfBanks);
+ ACT.resize(2 * numBanks);
+ RDWR.resize(2 * numBanks);
+ PRE.resize(numBanks);
bankaccess = memSpec.memArchSpec.nbrOfBanks;
if (!ACT.empty()) {
ACT.erase(ACT.begin(), ACT.end());
@@ -96,14 +100,15 @@ void cmdScheduler::schedulingInitialization(MemorySpecification memSpec)
}
///////////////initialization//////////////
- for (unsigned i = 0; i < memSpec.memArchSpec.nbrOfBanks; i++) {
+ for (int64_t i = 0; i < memSpec.memArchSpec.nbrOfBanks; i++) {
cmd.Type = PRECHARGE;
- cmd.bank = i;
+ cmd.bank = static_cast<unsigned>(i);
cmd.name = "PRE";
- if (memSpec.id == "WIDEIO_SDR")
- cmd.time = 1 - static_cast<double>(memSpec.memTimingSpec.TAW);
- else
- cmd.time = 1 - static_cast<double>(memSpec.memTimingSpec.FAW);
+ if (memSpec.id == "WIDEIO_SDR") {
+ cmd.time = 1 - memSpec.memTimingSpec.TAW;
+ } else {
+ cmd.time = 1 - memSpec.memTimingSpec.FAW;
+ }
PRE.push_back(cmd);
@@ -114,7 +119,7 @@ void cmdScheduler::schedulingInitialization(MemorySpecification memSpec)
cmd.Type = WRITE;
cmd.name = "WRITE";
cmd.time = -1;
- RDWR[i].push_back(cmd);
+ RDWR[static_cast<size_t>(i)].push_back(cmd);
}
tREF = memTimingSpec.REFI;
transFinish.time = 0;
@@ -130,14 +135,14 @@ void cmdScheduler::schedulingInitialization(MemorySpecification memSpec)
// transactions are generated according to the information read from the traces.
// Then the command scheduling function is triggered to generate commands and
// schedule them to the memory according to the timing constraints.
-void cmdScheduler::getTrans(std::ifstream& trans_trace, MemorySpecification memSpec)
+void cmdScheduler::getTrans(std::ifstream& trans_trace, const MemorySpecification& memSpec)
{
std::string line;
transTime = 0;
- unsigned newtranstime;
- unsigned transAddr;
- unsigned transType = 1;
+ uint64_t newtranstime;
+ uint64_t transAddr;
+ int64_t transType = 1;
trans TransItem;
if (!transTrace.empty()) {
@@ -147,12 +152,12 @@ void cmdScheduler::getTrans(std::ifstream& trans_trace, MemorySpecification memS
while (getline(trans_trace, line)) {
istringstream linestream(line);
string item;
- unsigned itemnum = 0;
+ uint64_t itemnum = 0;
while (getline(linestream, item, ',')) {
if (itemnum == 0) {
stringstream timestamp(item);
timestamp >> newtranstime;
- transTime = transTime + newtranstime;
+ transTime = transTime + static_cast<int64_t>(newtranstime);
} else if (itemnum == 1) {
if (item == "write" || item == "WRITE") {
transType = WRITE;
@@ -191,33 +196,35 @@ void cmdScheduler::getTrans(std::ifstream& trans_trace, MemorySpecification memS
// be scheduled until all the commands for the current one are scheduled.
// After the scheduling, a sequence of commands are obtained and they are written
// into commands.txt which will be used for power analysis.
-void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
+void cmdScheduler::analyticalScheduling(const MemorySpecification& memSpec)
{
- int Bs = -1;
- int transType = -1;
- double timer = 0;
- int bankGroupPointer = 0;
- int bankGroupAddr = 0;
+ int64_t transType = -1;
+ int64_t timer = 0;
+ uint64_t bankGroupPointer = 0;
+ uint64_t bankGroupAddr = 0;
bool collisionFound;
physicalAddr PhysicalAddress;
bool bankGroupSwitch = false;
- std::vector<unsigned> bankPointer(nbrOfBankGroups, 0);
- std::vector<int> bankAccessNum(nBanks, -1);
- std::vector<bool> ACTSchedule(nBanks, false);
- int bankAddr = -1;
- double endTime = 0;
- double tComing_REF = 0;
+ std::vector<uint64_t> bankPointer(static_cast<size_t>(nbrOfBankGroups), 0);
+ std::vector<int64_t> bankAccessNum(static_cast<size_t>(nBanks), -1);
+ std::vector<bool> ACTSchedule(static_cast<size_t>(nBanks), false);
+ uint64_t bankAddr = 0;
+ int64_t endTime = 0;
+ int64_t tComing_REF = 0;
Inselfrefresh = 0;
- MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
+ const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
- for (unsigned t = 0; t < transTrace.size(); t++) {
+ for (uint64_t t = 0; t < transTrace.size(); t++) {
cmdScheduling.erase(cmdScheduling.begin(), cmdScheduling.end());
- for (unsigned i = 0; i < nBanks; i++) {
- ACTSchedule[i] = false;
- bankAccessNum[i] = -1;
+ for (auto a : ACTSchedule) {
+ a = false;
+ }
+
+ for (auto& b : bankAccessNum) {
+ b = -1;
}
timingsGet = false;
@@ -225,13 +232,13 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
PhysicalAddress = memoryMap(transTrace[t], memSpec);
- for (unsigned i = 0; i < nbrOfBankGroups; i++) {
- bankPointer[i] = PhysicalAddress.bankAddr; // the bank pointer per group.
+ for (auto& b : bankPointer) {
+ b = PhysicalAddress.bankAddr; // the bank pointer per group.
}
bankGroupPointer = PhysicalAddress.bankGroupAddr;
- endTime = max(transFinish.time, PRE[transFinish.bank].time +
- static_cast<int>(memTimingSpec.RP));
+ endTime = max(transFinish.time, PRE[static_cast<size_t>(transFinish.bank)].time +
+ static_cast<int>(memTimingSpec.RP));
// Before starting the scheduling for the next transaction, it has to
// check whether it is necessary for implementing power down.
@@ -244,14 +251,12 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
///////////////Scheduling Refresh////////////////////////
if (((transFinish.time >= tREF) || (timer >= tREF))) {
- for (double i = 0; i <= ((timer - tComing_REF) > 0 ? (timer - tComing_REF) /
+ for (int64_t i = 0; i <= ((timer - tComing_REF) > 0 ? (timer - tComing_REF) /
memTimingSpec.REFI : 0); i++) {
cmd.bank = 0;
cmd.name = "REF";
- cmd.time = max(max(max(transFinish.time, PRE[transFinish.bank].time
- + static_cast<int>(memTimingSpec.RP)), tREF), startTime);
- if (((power_down == SELF_REFRESH) && !Inselfrefresh) ||
- (power_down != SELF_REFRESH)) {
+ cmd.time = max(max(max(transFinish.time, PRE[static_cast<size_t>(transFinish.bank)].time + memTimingSpec.RP), tREF), startTime);
+ if ((power_down == SELF_REFRESH && !Inselfrefresh) || power_down != SELF_REFRESH) {
cmdScheduling.push_back(cmd);
startTime = cmd.time + memTimingSpec.RFC;
}
@@ -262,7 +267,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
}
}
///////////////Execution Transactions///////////////////
- Bs = PhysicalAddress.bankAddr;
+ uint64_t Bs = PhysicalAddress.bankAddr;
transType = transTrace[t].type;
tRWTP = getRWTP(transType, memSpec);
@@ -280,9 +285,8 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
bankGroupSwitch = true;
}
// update to the current bank group address.
- bankGroupAddr = PhysicalAddress.bankGroupAddr + j;
- bankAddr = bankGroupAddr * nBanks / nbrOfBankGroups +
- bankPointer[bankGroupAddr];
+ bankGroupAddr = PhysicalAddress.bankGroupAddr + static_cast<uint64_t>(j);
+ bankAddr = bankGroupAddr * static_cast<uint64_t>(nBanks) / nbrOfBankGroups + bankPointer[bankGroupAddr];
} else {
bankAddr = Bs + i;
}
@@ -312,7 +316,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
static_cast<int>(memTimingSpec.TAW));
}
- if ((i == 0) && (j == 0)) {
+ if (i == 0 && j == 0) {
cmd.time = max(cmd.time, PreRDWR.time + 1);
cmd.time = max(cmd.time, timer);
cmd.time = max(startTime, cmd.time);
@@ -358,7 +362,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
}
for (int ACTBank = static_cast<int>(ACT.size() - 1);
ACTBank >= 0; ACTBank--) {
- if (ACT[ACTBank].bank == bankAddr) {
+ if (ACT[ACTBank].bank == static_cast<int64_t>(bankAddr)) {
cmd.time = max(PreRDWR.time + tSwitch_init, ACT.back().time
+ static_cast<int>(memTimingSpec.RCD));
break;
@@ -392,7 +396,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
PRE[bankAddr].name = "PRE";
for (int ACTBank = static_cast<int>(ACT.size() - 1);
ACTBank >= 0; ACTBank--) {
- if (ACT[ACTBank].bank == bankAddr) {
+ if (ACT[ACTBank].bank == static_cast<int64_t>(bankAddr)) {
PRE[bankAddr].time = max(ACT.back().time +
static_cast<int>(memTimingSpec.RAS),
PreRDWR.time + tRWTP);
@@ -419,7 +423,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
/////////////Update Vector Length/////////////////
// the vector length is reduced so that less memory is used for running
// this tool.
- if (ACT.size() >= memSpec.memArchSpec.nbrOfBanks) {
+ if (ACT.size() >= static_cast<size_t>(memSpec.memArchSpec.nbrOfBanks)) {
for (int m = 0; m < BI * BGI; m++) {
ACT.erase(ACT.begin());
RDWR[0].erase(RDWR[0].begin(), RDWR[0].end());
@@ -443,14 +447,14 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec)
// to add the power down/up during the command scheduling for transactions.
// It is called when the command scheduling for a transaction is finished, and it
// is also called if there is a refresh.
-void cmdScheduler::pdScheduling(double endTime, double timer,
- MemorySpecification memSpec)
+void cmdScheduler::pdScheduling(int64_t endTime, int64_t timer,
+ const MemorySpecification& memSpec)
{
- double ZERO = 0;
- MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
+ int64_t ZERO = 0;
+ const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
endTime = max(endTime, startTime);
- double pdTime = max(ZERO, timer - endTime);
+ int64_t pdTime = max(ZERO, timer - endTime);
if ((timer > (endTime + memTimingSpec.CKE)) && (power_down == POWER_DOWN)) {
cmd.bank = 0;
@@ -490,11 +494,11 @@ void cmdScheduler::pdScheduling(double endTime, double timer,
// get the time when a precharge occurs after a read/write command is scheduled.
// In addition, it copes with different kind of memories.
-int cmdScheduler::getRWTP(int transType, MemorySpecification memSpec)
+int64_t cmdScheduler::getRWTP(int64_t transType, const MemorySpecification& memSpec)
{
- int tRWTP_init = 0;
- MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
- MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
+ int64_t tRWTP_init = 0;
+ const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
+ const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
if (transType == READ) {
switch (memSpec.memoryType) {
@@ -506,13 +510,13 @@ int cmdScheduler::getRWTP(int transType, MemorySpecification memSpec)
case MemoryType::LPDDR2:
case MemoryType::LPDDR3:
tRWTP_init = memArchSpec.burstLength / memArchSpec.dataRate +
- max(0, static_cast<int>(memTimingSpec.RTP - 2));
+ max(int64_t(0), memTimingSpec.RTP - 2);
break;
case MemoryType::DDR2:
tRWTP_init = memTimingSpec.AL + memArchSpec.burstLength /
memArchSpec.dataRate +
- max(static_cast<int>(memTimingSpec.RTP), 2) - 2;
+ max(memTimingSpec.RTP, int64_t(2)) - 2;
break;
case MemoryType::DDR3:
@@ -525,10 +529,10 @@ int cmdScheduler::getRWTP(int transType, MemorySpecification memSpec)
} else if (transType == WRITE) {
if (memSpec.memoryType == MemoryType::WIDEIO_SDR) {
tRWTP_init = memTimingSpec.WL + memArchSpec.burstLength /
- memArchSpec.dataRate - 1 + memSpec.memTimingSpec.WR;
+ memArchSpec.dataRate - 1 + memTimingSpec.WR;
} else {
tRWTP_init = memTimingSpec.WL + memArchSpec.burstLength /
- memArchSpec.dataRate + memSpec.memTimingSpec.WR;
+ memArchSpec.dataRate + memTimingSpec.WR;
}
if ((memSpec.memoryType == MemoryType::LPDDR2) ||
(memSpec.memoryType == MemoryType::LPDDR3)) {
@@ -543,11 +547,11 @@ int cmdScheduler::getRWTP(int transType, MemorySpecification memSpec)
// In particular, tSwitch_init is generally used to provide the timings for
// scheduling a read/write command after a read/write command which have been
// scheduled to any possible banks within any possible bank groups (DDR4).
-void cmdScheduler::getTimingConstraints(bool BGSwitch, MemorySpecification memSpec,
- int PreType, int CurrentType)
+void cmdScheduler::getTimingConstraints(bool BGSwitch, const MemorySpecification& memSpec,
+ int64_t PreType, int64_t CurrentType)
{
- MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
- MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
+ const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
+ const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
if (memSpec.memoryType != MemoryType::DDR4) {
tRRD_init = memTimingSpec.RRD;
@@ -586,7 +590,7 @@ void cmdScheduler::getTimingConstraints(bool BGSwitch, MemorySpecification memSp
if (PreType == CurrentType) {
tSwitch_init = tCCD_init;
timingsGet = true;
- } else if ((PreType == WRITE) && (CurrentType == READ)) {
+ } else if (PreType == WRITE && CurrentType == READ) {
tSwitch_init = memTimingSpec.WL + memArchSpec.burstLength /
memArchSpec.dataRate + tWTR_init;
}
@@ -601,59 +605,55 @@ void cmdScheduler::getTimingConstraints(bool BGSwitch, MemorySpecification memSp
// The logical address of each transaction is translated into a physical address
// which consists of bank group (for DDR4), bank, row and column addresses.
cmdScheduler::physicalAddr cmdScheduler::memoryMap(trans Trans,
- MemorySpecification memSpec)
+ const MemorySpecification& memSpec)
{
- int DecLogic;
+ int64_t DecLogic;
physicalAddr PhysicalAddr;
DecLogic = Trans.logicalAddress;
// row-bank-column-BI-BC-BGI-BL
- if ((BGI > 1) && (memSpec.memoryType == MemoryType::DDR4)) {
- unsigned colBits = static_cast<unsigned>(log2(nColumns));
- unsigned bankShift = static_cast<unsigned>(colBits + ((BI > 1) ? log2(BI) : 0)
- + ((BGI > 1) ? log2(BGI) : 0));
- unsigned bankMask = static_cast<unsigned>(nBanks / (BI * nbrOfBankGroups) - 1)
- << bankShift;
- unsigned bankAddr = (DecLogic & bankMask) >>
- static_cast<unsigned>(colBits + ((BGI > 1) ? log2(BGI) : 0));
+ if (BGI > 1 && memSpec.memoryType == MemoryType::DDR4) {
+ uint64_t colBits = uintLog2(nColumns);
+ uint64_t bankShift = colBits + ((BI > 1) ? uintLog2(BI) : 0) + ((BGI > 1) ? uintLog2(BGI) : 0);
+ uint64_t bankMask = (nBanks / (BI * nbrOfBankGroups) - 1) << bankShift;
+ uint64_t bankAddr = (DecLogic & bankMask) >> (colBits + ((BGI > 1) ? uintLog2(BGI) : 0));
PhysicalAddr.bankAddr = bankAddr;
- unsigned bankGroupShift = static_cast<unsigned>(log2(burstLength));
- unsigned bankGroupMask = (nbrOfBankGroups / BGI - 1) << bankGroupShift;
- unsigned bankGroupAddr = (DecLogic & bankGroupMask) >> bankGroupShift;
+ uint64_t bankGroupShift = uintLog2(burstLength);
+ uint64_t bankGroupMask = (nbrOfBankGroups / BGI - 1) << bankGroupShift;
+ uint64_t bankGroupAddr = (DecLogic & bankGroupMask) >> bankGroupShift;
PhysicalAddr.bankGroupAddr = bankGroupAddr;
- unsigned colShift = static_cast<unsigned>(log2(BC * burstLength) +
- ((BI > 1) ? log2(BI) : 0) + ((BGI > 1) ? log2(BGI) : 0));
- unsigned colMask = static_cast<unsigned>(nColumns / (BC * burstLength) - 1)
- << colShift;
- unsigned colAddr = (DecLogic & colMask) >>
- static_cast<unsigned>((colShift - log2(static_cast<unsigned>(BC) * burstLength)));
+ uint64_t colShift = uintLog2(BC * burstLength) +
+ ((BI > 1) ? uintLog2(BI) : 0) + ((BGI > 1) ? uintLog2(BGI) : 0);
+ uint64_t colMask = (nColumns / (BC * burstLength) - 1) << colShift;
+ uint64_t colAddr = (DecLogic & colMask) >> (colShift - uintLog2(static_cast<uint64_t>(BC) * burstLength));
PhysicalAddr.colAddr = colAddr;
} else {
- unsigned colBits = static_cast<unsigned>(log2(nColumns));
- unsigned bankShift = static_cast<unsigned>(colBits + ((BI > 1) ? log2(BI) : 0));
- unsigned bankMask = static_cast<unsigned>(nBanks / BI - 1) << bankShift;
- unsigned bankAddr = (DecLogic & bankMask) >> colBits;
+ uint64_t colBits = uintLog2(nColumns);
+ uint64_t bankShift = colBits + ((BI > 1) ? uintLog2(BI) : 0);
+ uint64_t bankMask = (nBanks / BI - 1) << bankShift;
+ uint64_t bankAddr = (DecLogic & bankMask) >> colBits;
PhysicalAddr.bankAddr = bankAddr;
- unsigned colShift = static_cast<unsigned>(log2(BC * burstLength) +
- ((BI > 1) ? log2(BI) : 0));
- unsigned colMask = static_cast<unsigned>(nColumns / (BC * burstLength) - 1)
- << colShift;
- unsigned colAddr = (DecLogic & colMask) >>
- static_cast<unsigned>((colShift - log2(static_cast<unsigned>(BC) * burstLength)));
+ uint64_t colShift = (uintLog2(BC * burstLength) + ((BI > 1) ? uintLog2(BI) : 0));
+ uint64_t colMask = (nColumns / (BC * burstLength) - 1) << colShift;
+ uint64_t colAddr = (DecLogic & colMask) >> (colShift - uintLog2(BC * burstLength));
PhysicalAddr.colAddr = colAddr;
PhysicalAddr.bankGroupAddr = 0;
}
- unsigned rowShift = static_cast<unsigned>(log2(nColumns * nBanks));
- unsigned rowMask = static_cast<unsigned>(memSpec.memArchSpec.nbrOfRows - 1)
- << rowShift;
- unsigned rowAddr = (DecLogic & rowMask) >> rowShift;
+ uint64_t rowShift = uintLog2(nColumns * nBanks);
+ uint64_t rowMask = (memSpec.memArchSpec.nbrOfRows - 1) << rowShift;
+ uint64_t rowAddr = (DecLogic & rowMask) >> rowShift;
PhysicalAddr.rowAddr = rowAddr;
return PhysicalAddr;
} // cmdScheduler::memoryMap
+
+uint64_t cmdScheduler::uintLog2(uint64_t in)
+{
+ return static_cast<uint64_t>(log2(in));
+} \ No newline at end of file
diff --git a/ext/drampower/src/CmdScheduler.h b/ext/drampower/src/CmdScheduler.h
index 3c60ea886..58efd279b 100644
--- a/ext/drampower/src/CmdScheduler.h
+++ b/ext/drampower/src/CmdScheduler.h
@@ -59,9 +59,9 @@ class cmdScheduler {
// the format of a transaction.
class trans {
public:
- int type;
- double timeStamp;
- unsigned logicalAddress;
+ int64_t type;
+ int64_t timeStamp;
+ uint64_t logicalAddress;
};
std::vector<trans> transTrace; // to store the transactions.
@@ -69,18 +69,18 @@ class cmdScheduler {
// the format of physical address.
class physicalAddr {
public:
- unsigned rowAddr;
- unsigned bankAddr;
- unsigned bankGroupAddr;
- unsigned colAddr;
+ uint64_t rowAddr;
+ uint64_t bankAddr;
+ uint64_t bankGroupAddr;
+ uint64_t colAddr;
};
// the format of a command.
class commandItem {
public:
- int Type;
- int bank;
- double time;
+ int64_t Type;
+ int64_t bank;
+ int64_t time;
std::string name;
physicalAddr PhysicalAddr;
// sorting the commands according to their scheduling time.
@@ -107,11 +107,11 @@ class cmdScheduler {
std::vector<commandItem> cmdScheduling;
std::vector<commandItem> cmdList;
unsigned elements;
- int BI, BC, BGI;
+ int64_t BI, BC, BGI;
// the function used to translate a transaction into a sequence of
// commands which are scheduled to the memory.
- void transTranslation(Data::MemorySpecification memSpec,
+ void transTranslation(const MemorySpecification& memSpec,
std::ifstream& trans_trace,
int grouping,
int interleaving,
@@ -119,45 +119,47 @@ class cmdScheduler {
int powerdown);
// get the transactions by reading the traces.
void getTrans(std::ifstream& pwr_trace,
- MemorySpecification memSpec);
+ const MemorySpecification& memSpec);
// the initialization function for scheduling.
- void schedulingInitialization(MemorySpecification memSpec);
+ void schedulingInitialization(const MemorySpecification& memSpec);
// the function used to schedule commands according to the timing constraints.
- void analyticalScheduling(MemorySpecification memSpec);
+ void analyticalScheduling(const MemorySpecification& memSpec);
// translate the logical address into physical address.
physicalAddr memoryMap(trans Trans,
- MemorySpecification memSpec);
+ const MemorySpecification& memSpec);
// the power down and power up are scheduled by pdScheduling
- void pdScheduling(double endTime,
- double timer,
- MemorySpecification memSpec);
+ void pdScheduling(int64_t endTime,
+ int64_t timer,
+ const MemorySpecification& memSpec);
// get the timings for scheduling a precharge since a read or write command
// is scheduled.
- int getRWTP(int transType,
- MemorySpecification memSpec);
+ int64_t getRWTP(int64_t transType,
+ const MemorySpecification& memSpec);
// get different kind of timing constraints according to the used memory.
void getTimingConstraints(bool BGSwitch,
- MemorySpecification memSpec,
- int PreType,
- int CurrentType);
+ const MemorySpecification& memSpec,
+ int64_t PreType,
+ int64_t CurrentType);
- double transTime;
+ uint64_t uintLog2(uint64_t in);
+
+ int64_t transTime;
// the flag for power down.
- int power_down;
- int Inselfrefresh;
- int tRRD_init;
- int tCCD_init;
- int tWTR_init;
- double tREF;
- double tSwitch_init;
- double tRWTP;
- int bankaccess;
- unsigned nBanks;
- unsigned nColumns;
- unsigned burstLength;
- unsigned nbrOfBankGroups;
+ int64_t power_down;
+ int64_t Inselfrefresh;
+ int64_t tRRD_init;
+ int64_t tCCD_init;
+ int64_t tWTR_init;
+ int64_t tREF;
+ int64_t tSwitch_init;
+ int64_t tRWTP;
+ int64_t bankaccess;
+ int64_t nBanks;
+ int64_t nColumns;
+ int64_t burstLength;
+ int64_t nbrOfBankGroups;
bool timingsGet;
- double startTime;
+ int64_t startTime;
// the scheduling results for all the transactions are written into
// commands which will be used by the power analysis part.
diff --git a/ext/drampower/src/CommandAnalysis.cc b/ext/drampower/src/CommandAnalysis.cc
index 4dea5c101..e557c2920 100644
--- a/ext/drampower/src/CommandAnalysis.cc
+++ b/ext/drampower/src/CommandAnalysis.cc
@@ -45,13 +45,34 @@
using namespace Data;
using namespace std;
-CommandAnalysis::CommandAnalysis()
+bool commandSorter(const MemCommand& i, const MemCommand& j)
{
+ if (i.getTimeInt64() == j.getTimeInt64()) {
+ return i.getType() == MemCommand::PRE && j.getType() != MemCommand::PRE;
+ } else {
+ return i.getTimeInt64() < j.getTimeInt64();
+ }
}
-CommandAnalysis::CommandAnalysis(const int nbrofBanks)
+CommandAnalysis::CommandAnalysis(const int64_t nbrofBanks)
{
// Initializing all counters and variables
+ clearStats(0);
+ zero = 0;
+
+ bankstate.resize(static_cast<size_t>(nbrofBanks), 0);
+ last_states.resize(static_cast<size_t>(nbrofBanks));
+ mem_state = 0;
+ num_active_banks = 0;
+
+ cmd_list.clear();
+ cached_cmd.clear();
+ activation_cycle.resize(static_cast<size_t>(nbrofBanks), 0);
+}
+
+// function to clear counters
+void CommandAnalysis::clearStats(const int64_t timestamp)
+{
numberofacts = 0;
numberofpres = 0;
@@ -64,10 +85,6 @@ CommandAnalysis::CommandAnalysis(const int nbrofBanks)
s_pre_pdns = 0;
numberofsrefs = 0;
- pop = 0;
- init = 0;
- zero = 0;
-
actcycles = 0;
precycles = 0;
f_act_pdcycles = 0;
@@ -85,28 +102,29 @@ CommandAnalysis::CommandAnalysis(const int nbrofBanks)
idlecycles_act = 0;
idlecycles_pre = 0;
+ // reset count references to timestamp so that they are moved
+ // to start of next stats generation
+ first_act_cycle = timestamp;
+ last_pre_cycle = timestamp;
+ pdn_cycle = timestamp;
+ sref_cycle = timestamp;
+ end_act_op = timestamp;
+ end_read_op = timestamp;
+ end_write_op = timestamp;
+
latest_act_cycle = -1;
- latest_pre_cycle = -1;
latest_read_cycle = -1;
latest_write_cycle = -1;
- end_read_op = 0;
- end_write_op = 0;
- end_act_op = 0;
-
- first_act_cycle = 0;
- last_pre_cycle = 0;
- bankstate.resize(nbrofBanks, 0);
- last_states.resize(nbrofBanks);
- mem_state = 0;
-
- sref_cycle = 0;
- pdn_cycle = 0;
-
- cmd_list.clear();
- full_cmd_list.resize(1, MemCommand::PRE);
- cached_cmd.clear();
- activation_cycle.resize(nbrofBanks, 0);
+ if (timestamp == 0) {
+ // set to -1 at beginning of simulation
+ latest_pre_cycle = -1;
+ } else {
+ // NOTE: reference is adjusted by tRP (PRE delay) when updating counter
+ // could remove tRP to ensure counter starts at beginning of next block;
+ // currently simply setting to timestamp for simplicity
+ latest_pre_cycle = timestamp;
+ }
}
// function to clear all arrays
@@ -114,7 +132,6 @@ void CommandAnalysis::clear()
{
cached_cmd.clear();
cmd_list.clear();
- full_cmd_list.clear();
last_states.clear();
bankstate.clear();
}
@@ -125,132 +142,57 @@ void CommandAnalysis::clear()
// issued command timestamp, when the auto-precharge would kick in
void CommandAnalysis::getCommands(const Data::MemorySpecification& memSpec,
- const int nbrofBanks, std::vector<MemCommand>& list, bool lastupdate)
+ std::vector<MemCommand>& list, bool lastupdate)
{
- for (vector<MemCommand>::const_iterator i = list.begin(); i != list.end(); ++i) {
- const MemCommand& cmd = *i;
- cmd_list.push_back(cmd);
-
+ for (size_t i = 0; i < list.size(); ++i) {
+ MemCommand& cmd = list[i];
MemCommand::cmds cmdType = cmd.getType();
if (cmdType == MemCommand::ACT) {
activation_cycle[cmd.getBank()] = cmd.getTimeInt64();
} else if (cmdType == MemCommand::RDA || cmdType == MemCommand::WRA) {
// Remove auto-precharge flag from command
- cmd_list.back().setType(cmd.typeWithoutAutoPrechargeFlag());
+ cmd.setType(cmd.typeWithoutAutoPrechargeFlag());
// Add the auto precharge to the list of cached_cmds
int64_t preTime = max(cmd.getTimeInt64() + cmd.getPrechargeOffset(memSpec, cmdType),
activation_cycle[cmd.getBank()] + memSpec.memTimingSpec.RAS);
- cached_cmd.push_back(MemCommand(MemCommand::PRE, cmd.getBank(), static_cast<double>(preTime)));
+ list.push_back(MemCommand(MemCommand::PRE, cmd.getBank(), preTime));
}
}
- pop = 0;
- // Note: the extra pre-cmds at the end of the lists, and the cast to double
- // of the size vector is probably not desirable.
- cmd_list.push_back(MemCommand::PRE);
- cached_cmd.push_back(MemCommand::PRE);
- analyse_commands(nbrofBanks, memSpec, cmd_list.size()-1,
- cached_cmd.size()-1, lastupdate);
- cmd_list.clear();
- cached_cmd.clear();
-} // CommandAnalysis::getCommands
-
-// Checks the auto-precharge cached command list and inserts the explicit
-// precharges with the appropriate timestamp in the original command list
-// (by merging) based on their offset from the issuing command. Calls the
-// evaluate function to analyse this expanded list of commands.
+ sort(list.begin(), list.end(), commandSorter);
-void CommandAnalysis::analyse_commands(const int nbrofBanks,
- Data::MemorySpecification memSpec, int64_t nCommands, int64_t nCached, bool lastupdate)
-{
- full_cmd_list.resize(1, MemCommand::PRE);
- unsigned mCommands = 0;
- unsigned mCached = 0;
- for (unsigned i = 0; i < nCommands + nCached + 1; i++) {
- if (cached_cmd.size() > 1) {
- if ((cmd_list[mCommands].getTime() > 1) && (init == 0)) {
- full_cmd_list[i].setType(MemCommand::PREA);
- init = 1;
- pop = 1;
- } else {
- init = 1;
- if ((cached_cmd[mCached].getTime() > 0) && (cmd_list.
- at(mCommands).getTime() < cached_cmd[mCached].
- getTime()) && ((cmd_list[mCommands].getTime() > 0) ||
- ((cmd_list[mCommands].getTime() == 0) && (cmd_list[mCommands].
- getType() != MemCommand::PRE)))) {
- full_cmd_list[i] = cmd_list[mCommands];
- mCommands++;
- } else if ((cached_cmd[mCached].getTime() > 0) && (cmd_list[mCommands].
- getTime() >= cached_cmd[mCached].getTime())) {
- full_cmd_list[i] = cached_cmd[mCached];
- mCached++;
- } else if (cached_cmd[mCached].getTime() == 0) {
- if ((cmd_list[mCommands].getTime() > 0) || ((cmd_list[mCommands].
- getTime() == 0) && (cmd_list[mCommands].
- getType() != MemCommand::PRE))) {
- full_cmd_list[i] = cmd_list[mCommands];
- mCommands++;
- }
- } else if (cmd_list[mCommands].getTime() == 0) {
- full_cmd_list[i] = cached_cmd[mCached];
- mCached++;
- }
- }
- } else {
- if ((cmd_list[mCommands].getTime() > 1) && (init == 0)) {
- full_cmd_list[i].setType(MemCommand::PREA);
- init = 1;
- pop = 1;
- } else {
- init = 1;
- if ((cmd_list[mCommands].getTime() > 0) || ((cmd_list.
- at(mCommands).getTime() == 0) && (cmd_list[mCommands].
- getType() != MemCommand::PRE))) {
- full_cmd_list[i] = cmd_list[mCommands];
- mCommands++;
- }
- }
- }
- full_cmd_list.resize(full_cmd_list.size() + 1, MemCommand::PRE);
+ if (lastupdate && list.empty() == false) {
+ // Add cycles at the end of the list
+ int64_t t = timeToCompletion(memSpec, list.back().getType()) + list.back().getTimeInt64() - 1;
+ list.push_back(MemCommand(MemCommand::NOP, 0, t));
}
- full_cmd_list.pop_back();
- if (pop == 0) {
- full_cmd_list.pop_back();
- }
- if (lastupdate) {
- full_cmd_list.resize(full_cmd_list.size() + 1, MemCommand::NOP);
- full_cmd_list[full_cmd_list.size() - 1].setTime(full_cmd_list
- [full_cmd_list.size() - 2].getTime() + timeToCompletion(memSpec,
- full_cmd_list[full_cmd_list.size() - 2].getType()) - 1);
- }
+ evaluate(memSpec, list);
+} // CommandAnalysis::getCommands
- evaluate(memSpec, full_cmd_list, nbrofBanks);
-} // CommandAnalysis::analyse_commands
// To get the time of completion of the issued command
// Derived based on JEDEC specifications
-int CommandAnalysis::timeToCompletion(const MemorySpecification&
+int64_t CommandAnalysis::timeToCompletion(const MemorySpecification&
memSpec, MemCommand::cmds type)
{
- int offset = 0;
+ int64_t offset = 0;
const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
if (type == MemCommand::RD) {
- offset = static_cast<int>(memTimingSpec.RL +
+ offset = memTimingSpec.RL +
memTimingSpec.DQSCK + 1 + (memArchSpec.burstLength /
- memArchSpec.dataRate));
+ memArchSpec.dataRate);
} else if (type == MemCommand::WR) {
- offset = static_cast<int>(memTimingSpec.WL +
+ offset = memTimingSpec.WL +
(memArchSpec.burstLength / memArchSpec.dataRate) +
- memTimingSpec.WR);
+ memTimingSpec.WR;
} else if (type == MemCommand::ACT) {
- offset = static_cast<int>(memTimingSpec.RCD);
+ offset = memTimingSpec.RCD;
} else if ((type == MemCommand::PRE) || (type == MemCommand::PREA)) {
- offset = static_cast<int>(memTimingSpec.RP);
+ offset = memTimingSpec.RP;
}
return offset;
} // CommandAnalysis::timeToCompletion
@@ -258,38 +200,39 @@ int CommandAnalysis::timeToCompletion(const MemorySpecification&
// Used to analyse a given list of commands and identify command timings
// and memory state transitions
void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
- vector<MemCommand>& cmd_list, int nbrofBanks)
+ vector<MemCommand>& cmd_list)
{
// for each command identify timestamp, type and bank
- for (unsigned cmd_list_counter = 0; cmd_list_counter < cmd_list.size();
- cmd_list_counter++) {
+ for (auto cmd : cmd_list) {
// For command type
- int type = cmd_list[cmd_list_counter].getType();
+ int type = cmd.getType();
// For command bank
- int bank = cmd_list[cmd_list_counter].getBank();
+ int bank = static_cast<int>(cmd.getBank());
// Command Issue timestamp in clock cycles (cc)
- int64_t timestamp = cmd_list[cmd_list_counter].getTimeInt64();
+ int64_t timestamp = cmd.getTimeInt64();
if (type == MemCommand::ACT) {
+ printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank);
// If command is ACT - update number of acts, bank state of the
// target bank, first and latest activation cycle and the memory
// state. Update the number of precharged/idle-precharged cycles.
numberofacts++;
- if (bankstate[bank] == 1) {
+ if (bankstate[static_cast<size_t>(bank)] == 1) {
printWarning("Bank is already active!", type, timestamp, bank);
}
- bankstate[bank] = 1;
- if (mem_state == 0) {
+ bankstate[static_cast<size_t>(bank)] = 1;
+ if (num_active_banks == 0) {
first_act_cycle = timestamp;
precycles += max(zero, timestamp - last_pre_cycle);
idle_pre_update(memSpec, timestamp, latest_pre_cycle);
}
latest_act_cycle = timestamp;
- mem_state++;
+ num_active_banks++;
} else if (type == MemCommand::RD) {
+ printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank);
// If command is RD - update number of reads and read cycle. Check
// for active idle cycles (if any).
- if (bankstate[bank] == 0) {
+ if (bankstate[static_cast<size_t>(bank)] == 0) {
printWarning("Bank is not active!", type, timestamp, bank);
}
numberofreads++;
@@ -297,9 +240,10 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
latest_act_cycle, timestamp);
latest_read_cycle = timestamp;
} else if (type == MemCommand::WR) {
+ printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank);
// If command is WR - update number of writes and write cycle. Check
// for active idle cycles (if any).
- if (bankstate[bank] == 0) {
+ if (bankstate[static_cast<size_t>(bank)] == 0) {
printWarning("Bank is not active!", type, timestamp, bank);
}
numberofwrites++;
@@ -307,6 +251,7 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
latest_act_cycle, timestamp);
latest_write_cycle = timestamp;
} else if (type == MemCommand::REF) {
+ printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank);
// If command is REF - update number of refreshes, set bank state of
// all banks to ACT, set the last PRE cycles at RFC-RP cycles from
// timestamp, set the number of active cycles to RFC-RP and check
@@ -321,56 +266,54 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
memSpec.memTimingSpec.RP;
latest_pre_cycle = last_pre_cycle;
actcycles += memSpec.memTimingSpec.RFC - memSpec.memTimingSpec.RP;
- mem_state = 0;
- for (int j = 0; j < nbrofBanks; j++) {
- bankstate[j] = 0;
+ num_active_banks = 0;
+ for (auto& b : bankstate) {
+ b = 0;
}
} else if (type == MemCommand::PRE) {
+ printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank);
// If command is explicit PRE - update number of precharges, bank
// state of the target bank and last and latest precharge cycle.
// Calculate the number of active cycles if the memory was in the
// active state before, but there is a state transition to PRE now.
// If not, update the number of precharged cycles and idle cycles.
// Update memory state if needed.
- if (bankstate[bank] == 1) {
+ if (bankstate[static_cast<size_t>(bank)] == 1) {
numberofpres++;
}
- bankstate[bank] = 0;
+ bankstate[static_cast<size_t>(bank)] = 0;
- if (mem_state == 1) {
+ if (num_active_banks == 1) {
actcycles += max(zero, timestamp - first_act_cycle);
last_pre_cycle = timestamp;
idle_act_update(memSpec, latest_read_cycle, latest_write_cycle,
latest_act_cycle, timestamp);
- } else if (mem_state == 0) {
+ } else if (num_active_banks == 0) {
precycles += max(zero, timestamp - last_pre_cycle);
idle_pre_update(memSpec, timestamp, latest_pre_cycle);
last_pre_cycle = timestamp;
}
latest_pre_cycle = timestamp;
- if (mem_state > 0) {
- mem_state--;
+ if (num_active_banks > 0) {
+ num_active_banks--;
} else {
- mem_state = 0;
+ num_active_banks = 0;
}
} else if (type == MemCommand::PREA) {
+ printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank);
// If command is explicit PREA (precharge all banks) - update
// number of precharges by the number of banks, update the bank
// state of all banks to PRE and set the precharge cycle.
// Calculate the number of active cycles if the memory was in the
// active state before, but there is a state transition to PRE now.
// If not, update the number of precharged cycles and idle cycles.
- if (timestamp == 0) {
- numberofpres += 0;
- } else {
- numberofpres += mem_state;
- }
+ numberofpres += num_active_banks;
- if (mem_state > 0) {
+ if (num_active_banks > 0) {
actcycles += max(zero, timestamp - first_act_cycle);
idle_act_update(memSpec, latest_read_cycle, latest_write_cycle,
latest_act_cycle, timestamp);
- } else if (mem_state == 0) {
+ } else if (num_active_banks == 0) {
precycles += max(zero, timestamp - last_pre_cycle);
idle_pre_update(memSpec, timestamp, latest_pre_cycle);
}
@@ -378,10 +321,10 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
latest_pre_cycle = timestamp;
last_pre_cycle = timestamp;
- mem_state = 0;
+ num_active_banks = 0;
- for (int j = 0; j < nbrofBanks; j++) {
- bankstate[j] = 0;
+ for (auto& b : bankstate) {
+ b = 0;
}
} else if (type == MemCommand::PDN_F_ACT) {
// If command is fast-exit active power-down - update number of
@@ -391,9 +334,7 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
// after powering-up. Update active and active idle cycles.
printWarningIfNotActive("All banks are precharged! Incorrect use of Active Power-Down.", type, timestamp, bank);
f_act_pdns++;
- for (int j = 0; j < nbrofBanks; j++) {
- last_states[j] = bankstate[j];
- }
+ last_states = bankstate;
pdn_cycle = timestamp;
actcycles += max(zero, timestamp - first_act_cycle);
idle_act_update(memSpec, latest_read_cycle, latest_write_cycle,
@@ -407,9 +348,7 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
// after powering-up. Update active and active idle cycles.
printWarningIfNotActive("All banks are precharged! Incorrect use of Active Power-Down.", type, timestamp, bank);
s_act_pdns++;
- for (int j = 0; j < nbrofBanks; j++) {
- last_states[j] = bankstate[j];
- }
+ last_states = bankstate;
pdn_cycle = timestamp;
actcycles += max(zero, timestamp - first_act_cycle);
idle_act_update(memSpec, latest_read_cycle, latest_write_cycle,
@@ -461,14 +400,14 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
memSpec.memTimingSpec.XPDLL -
(2 * memSpec.memTimingSpec.RCD));
}
- } else if ((mem_state != CommandAnalysis::MS_PDN_S_ACT) || (mem_state !=
- CommandAnalysis::MS_PDN_F_ACT)) {
+ } else if (mem_state != CommandAnalysis::MS_PDN_S_ACT || mem_state != CommandAnalysis::MS_PDN_F_ACT) {
cerr << "Incorrect use of Active Power-Up!" << endl;
}
+ num_active_banks = 0;
mem_state = 0;
- for (int j = 0; j < nbrofBanks; j++) {
- bankstate[j] = last_states[j];
- mem_state += last_states[j];
+ bankstate = last_states;
+ for (auto& a : last_states) {
+ num_active_banks += static_cast<unsigned int>(a);
}
first_act_cycle = timestamp;
} else if (type == MemCommand::PUP_PRE) {
@@ -493,11 +432,11 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
memSpec.memTimingSpec.XPDLL - memSpec.memTimingSpec.RCD -
memSpec.memTimingSpec.RP);
}
- } else if ((mem_state != CommandAnalysis::MS_PDN_S_PRE) || (mem_state !=
- CommandAnalysis::MS_PDN_F_PRE)) {
+ } else if (mem_state != CommandAnalysis::MS_PDN_S_PRE || mem_state != CommandAnalysis::MS_PDN_F_PRE) {
cerr << "Incorrect use of Precharged Power-Up!" << endl;
}
mem_state = 0;
+ num_active_banks = 0;
last_pre_cycle = timestamp;
} else if (type == MemCommand::SREN) {
// If command is self-refresh - update number of self-refreshes,
@@ -583,14 +522,15 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
}
}
mem_state = 0;
- } else if ((type == MemCommand::END) || (type == MemCommand::NOP)) {
+ num_active_banks = 0;
+ } else if (type == MemCommand::END || type == MemCommand::NOP) {
// May be optionally used at the end of memory trace for better accuracy
// Update all counters based on completion of operations.
- if ((mem_state > 0) && (mem_state < 9)) {
+ if (num_active_banks > 0 && mem_state == 0) {
actcycles += max(zero, timestamp - first_act_cycle);
idle_act_update(memSpec, latest_read_cycle, latest_write_cycle,
latest_act_cycle, timestamp);
- } else if (mem_state == 0) {
+ } else if (num_active_banks == 0 && mem_state == 0) {
precycles += max(zero, timestamp - last_pre_cycle);
idle_pre_update(memSpec, timestamp, latest_pre_cycle);
} else if (mem_state == CommandAnalysis::MS_PDN_F_ACT) {
@@ -604,6 +544,9 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec,
} else if (mem_state == CommandAnalysis::MS_SREF) {
sref_cycles += max(zero, timestamp - sref_cycle);
}
+ } else {
+ printWarning("Unknown command given, exiting.", type, timestamp, bank);
+ exit(-1);
}
}
} // CommandAnalysis::evaluate
@@ -646,14 +589,21 @@ void CommandAnalysis::idle_pre_update(const MemorySpecification& memSpec,
void CommandAnalysis::printWarningIfActive(const string& warning, int type, int64_t timestamp, int bank)
{
- if (mem_state != 0) {
+ if (num_active_banks != 0) {
printWarning(warning, type, timestamp, bank);
}
}
void CommandAnalysis::printWarningIfNotActive(const string& warning, int type, int64_t timestamp, int bank)
{
- if (mem_state == 0) {
+ if (num_active_banks == 0) {
+ printWarning(warning, type, timestamp, bank);
+ }
+}
+
+void CommandAnalysis::printWarningIfPoweredDown(const string& warning, int type, int64_t timestamp, int bank)
+{
+ if (mem_state != 0) {
printWarning(warning, type, timestamp, bank);
}
}
diff --git a/ext/drampower/src/CommandAnalysis.h b/ext/drampower/src/CommandAnalysis.h
index b5c7ac778..15261fb2f 100644
--- a/ext/drampower/src/CommandAnalysis.h
+++ b/ext/drampower/src/CommandAnalysis.h
@@ -58,10 +58,8 @@ class CommandAnalysis {
MS_PDN_S_PRE = 13, MS_SREF = 14
};
- CommandAnalysis();
-
// Returns number of reads, writes, acts, pres and refs in the trace
- CommandAnalysis(const int nbrofBanks);
+ CommandAnalysis(const int64_t nbrofBanks);
// Number of activate commands
int64_t numberofacts;
@@ -117,29 +115,25 @@ class CommandAnalysis {
// Number of precharged auto-refresh cycles during self-refresh exit
int64_t spup_ref_pre_cycles;
+ // function for clearing counters
+ void clearStats(const int64_t timestamp);
+
// function for clearing arrays
void clear();
// To identify auto-precharges
void getCommands(const MemorySpecification& memSpec,
- const int
- nbrofBanks,
std::vector<MemCommand>& list,
bool lastupdate);
private:
- unsigned init;
int64_t zero;
- unsigned pop;
// Cached last read command from the file
std::vector<MemCommand> cached_cmd;
// Stores the memory commands for analysis
std::vector<MemCommand> cmd_list;
- // Stores all memory commands for analysis
- std::vector<MemCommand> full_cmd_list;
-
// To save states of the different banks, before entering active
// power-down mode (slow/fast-exit).
std::vector<int> last_states;
@@ -171,26 +165,20 @@ class CommandAnalysis {
// Memory State
unsigned mem_state;
+ unsigned num_active_banks;
// Clock cycle of first activate command when memory state changes to ACT
int64_t first_act_cycle;
// Clock cycle of last precharge command when memory state changes to PRE
int64_t last_pre_cycle;
- // To collect and analyse all commands including auto-precharges
- void analyse_commands(const int nbrofBanks,
- Data::MemorySpecification
- memSpec,
- int64_t nCommands,
- int64_t nCached,
- bool lastupdate);
+
// To perform timing analysis of a given set of commands and update command counters
void evaluate(const MemorySpecification& memSpec,
- std::vector<MemCommand>& cmd_list,
- int nbrofBanks);
+ std::vector<MemCommand>& cmd_list);
// To calculate time of completion of any issued command
- int timeToCompletion(const MemorySpecification& memSpec,
+ int64_t timeToCompletion(const MemorySpecification& memSpec,
MemCommand::cmds type);
// To update idle period information whenever active cycles may be idle
@@ -207,6 +195,7 @@ class CommandAnalysis {
void printWarningIfActive(const std::string& warning, int type, int64_t timestamp, int bank);
void printWarningIfNotActive(const std::string& warning, int type, int64_t timestamp, int bank);
+ void printWarningIfPoweredDown(const std::string& warning, int type, int64_t timestamp, int bank);
void printWarning(const std::string& warning, int type, int64_t timestamp, int bank);
};
}
diff --git a/ext/drampower/src/MemArchitectureSpec.h b/ext/drampower/src/MemArchitectureSpec.h
index ca79edc91..49eddc8ac 100644
--- a/ext/drampower/src/MemArchitectureSpec.h
+++ b/ext/drampower/src/MemArchitectureSpec.h
@@ -31,13 +31,15 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * Authors: Karthik Chandrasekar
+ * Authors: Karthik Chandrasekar, Sven Goossens
*
*/
#ifndef TOOLS_MEM_ARCHITECTURE_SPEC_H
#define TOOLS_MEM_ARCHITECTURE_SPEC_H
+#include <stdint.h>
+
#include "Parametrisable.h"
namespace Data {
@@ -46,14 +48,14 @@ class MemArchitectureSpec : public virtual Parametrisable {
MemArchitectureSpec();
void processParameters();
- unsigned int burstLength;
- unsigned nbrOfBanks;
- unsigned nbrOfRanks;
- unsigned dataRate;
- unsigned nbrOfColumns;
- unsigned nbrOfRows;
- unsigned width;
- unsigned nbrOfBankGroups;
+ int64_t burstLength;
+ int64_t nbrOfBanks;
+ int64_t nbrOfRanks;
+ int64_t dataRate;
+ int64_t nbrOfColumns;
+ int64_t nbrOfRows;
+ int64_t width;
+ int64_t nbrOfBankGroups;
bool dll;
bool twoVoltageDomains;
bool termination;
diff --git a/ext/drampower/src/MemCommand.cc b/ext/drampower/src/MemCommand.cc
index 156716c2f..5e1115e05 100644
--- a/ext/drampower/src/MemCommand.cc
+++ b/ext/drampower/src/MemCommand.cc
@@ -44,15 +44,9 @@
using namespace Data;
using namespace std;
-MemCommand::MemCommand() :
- type(MemCommand::PRE),
- bank(0),
- timestamp(0)
-{
-}
MemCommand::MemCommand(MemCommand::cmds type,
- unsigned bank, double timestamp) :
+ unsigned bank, int64_t timestamp) :
type(type),
bank(bank),
timestamp(timestamp)
@@ -80,35 +74,35 @@ unsigned MemCommand::getBank() const
}
// For auto-precharge with read or write - to calculate cycle of precharge
-int MemCommand::getPrechargeOffset(const MemorySpecification& memSpec,
+int64_t MemCommand::getPrechargeOffset(const MemorySpecification& memSpec,
MemCommand::cmds type) const
{
- int precharge_offset = 0;
+ int64_t precharge_offset = 0;
- int BL(static_cast<int>(memSpec.memArchSpec.burstLength));
- int RTP(static_cast<int>(memSpec.memTimingSpec.RTP));
- int dataRate(static_cast<int>(memSpec.memArchSpec.dataRate));
- int AL(static_cast<int>(memSpec.memTimingSpec.AL));
- int WL(static_cast<int>(memSpec.memTimingSpec.WL));
- int WR(static_cast<int>(memSpec.memTimingSpec.WR));
- int B = BL/dataRate;
+ int64_t BL = memSpec.memArchSpec.burstLength;
+ int64_t RTP = memSpec.memTimingSpec.RTP;
+ int64_t dataRate = memSpec.memArchSpec.dataRate;
+ int64_t AL = memSpec.memTimingSpec.AL;
+ int64_t WL = memSpec.memTimingSpec.WL;
+ int64_t WR = memSpec.memTimingSpec.WR;
+ int64_t B = BL/dataRate;
const MemoryType::MemoryType_t& memType = memSpec.memoryType;
// Read with auto-precharge
if (type == MemCommand::RDA) {
if (memType == MemoryType::DDR2) {
- precharge_offset = B + AL - 2 + max(RTP, 2);
+ precharge_offset = B + AL - 2 + max(RTP, int64_t(2));
} else if (memType == MemoryType::DDR3) {
- precharge_offset = AL + max(RTP, 4);
+ precharge_offset = AL + max(RTP, int64_t(4));
} else if (memType == MemoryType::DDR4) {
precharge_offset = AL + RTP;
} else if (memType == MemoryType::LPDDR) {
precharge_offset = B;
} else if (memType == MemoryType::LPDDR2) {
- precharge_offset = B + max(0, RTP - 2);
+ precharge_offset = B + max(int64_t(0), RTP - 2);
} else if (memType == MemoryType::LPDDR3) {
- precharge_offset = B + max(0, RTP - 4);
+ precharge_offset = B + max(int64_t(0), RTP - 4);
} else if (memType == MemoryType::WIDEIO_SDR) {
precharge_offset = B;
}
@@ -133,19 +127,14 @@ int MemCommand::getPrechargeOffset(const MemorySpecification& memSpec,
return precharge_offset;
} // MemCommand::getPrechargeOffset
-void MemCommand::setTime(double _timestamp)
+void MemCommand::setTime(int64_t _timestamp)
{
timestamp = _timestamp;
}
-double MemCommand::getTime() const
-{
- return timestamp;
-}
-
int64_t MemCommand::getTimeInt64() const
{
- return static_cast<int64_t>(timestamp);
+ return timestamp;
}
MemCommand::cmds MemCommand::typeWithoutAutoPrechargeFlag() const
diff --git a/ext/drampower/src/MemCommand.h b/ext/drampower/src/MemCommand.h
index ea7164577..9eb751088 100644
--- a/ext/drampower/src/MemCommand.h
+++ b/ext/drampower/src/MemCommand.h
@@ -86,17 +86,18 @@ class MemCommand {
PUP_ACT = 14,
SREN = 15,
SREX = 16,
- NOP = 17
+ NOP = 17,
+ UNINITIALIZED = 18
};
- MemCommand();
+// MemCommand();
MemCommand(
// Command Type
- MemCommand::cmds type,
+ MemCommand::cmds type = UNINITIALIZED,
// Target Bank
unsigned bank = 0,
// Command Issue Timestamp (in cc)
- double timestamp = 0);
+ int64_t timestamp = 0L);
// Get command type
cmds getType() const;
@@ -111,16 +112,15 @@ class MemCommand {
unsigned getBank() const;
// Set timestamp
- void setTime(double _timestamp);
+ void setTime(int64_t _timestamp);
// Get timestamp
- double getTime() const;
int64_t getTimeInt64() const;
cmds typeWithoutAutoPrechargeFlag() const;
// To calculate precharge offset after read or write with auto-precharge
- int getPrechargeOffset(const MemorySpecification& memSpec,
+ int64_t getPrechargeOffset(const MemorySpecification& memSpec,
MemCommand::cmds type) const;
// To check for equivalence
@@ -136,19 +136,35 @@ class MemCommand {
}
}
- static const unsigned int nCommands = 18;
+ static const unsigned int nCommands = 19;
static std::string* getCommandTypeStrings()
{
- static std::string type_map[nCommands] = { "ACT", "RD", "WR", "PRE", "REF",
- "END", "RDA", "WRA", "PREA", "PDN_F_PRE","PDN_S_PRE", "PDN_F_ACT",
- "PDN_S_ACT", "PUP_PRE", "PUP_ACT", "SREN", "SREX", "NOP" };
+ static std::string type_map[nCommands] = { "ACT",
+ "RD",
+ "WR",
+ "PRE",
+ "REF",
+ "END",
+ "RDA",
+ "WRA",
+ "PREA",
+ "PDN_F_PRE",
+ "PDN_S_PRE",
+ "PDN_F_ACT",
+ "PDN_S_ACT",
+ "PUP_PRE",
+ "PUP_ACT",
+ "SREN",
+ "SREX",
+ "NOP",
+ "UNINITIALIZED" };
return type_map;
}
// To identify command type from name
- static cmds getTypeFromName(const std::string name)
+ static cmds getTypeFromName(const std::string& name)
{
std::string* typeStrings = getCommandTypeStrings();
@@ -165,7 +181,7 @@ class MemCommand {
private:
MemCommand::cmds type;
unsigned bank;
- double timestamp;
+ int64_t timestamp;
};
}
#endif // ifndef MEMCOMMAND_H
diff --git a/ext/drampower/src/MemTimingSpec.h b/ext/drampower/src/MemTimingSpec.h
index 1c3a80c6e..104bf5c71 100644
--- a/ext/drampower/src/MemTimingSpec.h
+++ b/ext/drampower/src/MemTimingSpec.h
@@ -31,10 +31,12 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * Authors: Karthik Chandrasekar
+ * Authors: Karthik Chandrasekar, Sven Goossens
*
*/
+#include <stdint.h>
+
#include "Parametrisable.h"
namespace Data {
@@ -44,35 +46,35 @@ class MemTimingSpec : public virtual Parametrisable {
void processParameters();
double clkMhz;
- unsigned RC;
- unsigned RCD;
- unsigned CCD;
- unsigned CCD_S;
- unsigned CCD_L;
- unsigned RRD;
- unsigned RRD_S;
- unsigned RRD_L;
- unsigned FAW;
- unsigned TAW;
- unsigned WTR;
- unsigned WTR_S;
- unsigned WTR_L;
- unsigned REFI;
- unsigned RL;
- unsigned RP;
- unsigned RFC;
- unsigned RAS;
- unsigned WL;
- unsigned AL;
- unsigned DQSCK;
- unsigned RTP;
- unsigned WR;
- unsigned XP;
- unsigned XPDLL;
- unsigned XS;
- unsigned XSDLL;
- unsigned CKE;
- unsigned CKESR;
+ int64_t RC;
+ int64_t RCD;
+ int64_t CCD;
+ int64_t CCD_S;
+ int64_t CCD_L;
+ int64_t RRD;
+ int64_t RRD_S;
+ int64_t RRD_L;
+ int64_t FAW;
+ int64_t TAW;
+ int64_t WTR;
+ int64_t WTR_S;
+ int64_t WTR_L;
+ int64_t REFI;
+ int64_t RL;
+ int64_t RP;
+ int64_t RFC;
+ int64_t RAS;
+ int64_t WL;
+ int64_t AL;
+ int64_t DQSCK;
+ int64_t RTP;
+ int64_t WR;
+ int64_t XP;
+ int64_t XPDLL;
+ int64_t XS;
+ int64_t XSDLL;
+ int64_t CKE;
+ int64_t CKESR;
double clkPeriod;
};
}
diff --git a/ext/drampower/src/MemoryPowerModel.cc b/ext/drampower/src/MemoryPowerModel.cc
index 4817d1bb5..e020830e6 100644
--- a/ext/drampower/src/MemoryPowerModel.cc
+++ b/ext/drampower/src/MemoryPowerModel.cc
@@ -37,23 +37,24 @@
#include "MemoryPowerModel.h"
-#include <cmath> // For pow
-
#include <stdint.h>
+#include <cmath> // For pow
+#include <iostream> // fmtflags
+
using namespace std;
using namespace Data;
// Calculate energy and average power consumption for the given command trace
-void MemoryPowerModel::power_calc(MemorySpecification memSpec,
- const CommandAnalysis& counters,
+void MemoryPowerModel::power_calc(const MemorySpecification& memSpec,
+ const CommandAnalysis& c,
int term)
{
- MemTimingSpec& t = memSpec.memTimingSpec;
- MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
- MemPowerSpec& mps = memSpec.memPowerSpec;
+ const MemTimingSpec& t = memSpec.memTimingSpec;
+ const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
+ const MemPowerSpec& mps = memSpec.memPowerSpec;
energy.act_energy = 0.0;
energy.pre_energy = 0.0;
@@ -102,16 +103,16 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec,
// 1 DQS and 1 DM pin is associated with every data byte
int64_t dqPlusDqsPlusMaskBits = memArchSpec.width + memArchSpec.width / 8 + memArchSpec.width / 8;
// Size of one clock period for the data bus.
- double ddrPeriod = t.clkPeriod / memArchSpec.dataRate;
+ double ddrPeriod = t.clkPeriod / static_cast<double>(memArchSpec.dataRate);
// Read IO power is consumed by each DQ (data) and DQS (data strobe) pin
- energy.read_io_energy = calcIoTermEnergy(counters.numberofreads * memArchSpec.burstLength,
+ energy.read_io_energy = calcIoTermEnergy(c.numberofreads * memArchSpec.burstLength,
ddrPeriod,
power.IO_power,
dqPlusDqsBits);
// Write ODT power is consumed by each DQ (data), DQS (data strobe) and DM
- energy.write_term_energy = calcIoTermEnergy(counters.numberofwrites * memArchSpec.burstLength,
+ energy.write_term_energy = calcIoTermEnergy(c.numberofwrites * memArchSpec.burstLength,
ddrPeriod,
power.WR_ODT_power,
dqPlusDqsPlusMaskBits);
@@ -119,14 +120,14 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec,
if (memArchSpec.nbrOfRanks > 1) {
// Termination power consumed in the idle rank during reads on the active
// rank by each DQ (data) and DQS (data strobe) pin.
- energy.read_oterm_energy = calcIoTermEnergy(counters.numberofreads * memArchSpec.burstLength,
+ energy.read_oterm_energy = calcIoTermEnergy(c.numberofreads * memArchSpec.burstLength,
ddrPeriod,
power.TermRD_power,
dqPlusDqsBits);
// Termination power consumed in the idle rank during writes on the active
// rank by each DQ (data), DQS (data strobe) and DM (data mask) pin.
- energy.write_oterm_energy = calcIoTermEnergy(counters.numberofwrites * memArchSpec.burstLength,
+ energy.write_oterm_energy = calcIoTermEnergy(c.numberofwrites * memArchSpec.burstLength,
ddrPeriod,
power.TermWR_power,
dqPlusDqsPlusMaskBits);
@@ -137,101 +138,101 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec,
+ energy.read_oterm_energy + energy.write_oterm_energy;
}
- total_cycles = counters.actcycles + counters.precycles +
- counters.f_act_pdcycles + counters.f_pre_pdcycles +
- counters.s_act_pdcycles + counters.s_pre_pdcycles + counters.sref_cycles
- + counters.sref_ref_act_cycles + counters.sref_ref_pre_cycles +
- counters.spup_ref_act_cycles + counters.spup_ref_pre_cycles;
+ total_cycles = c.actcycles + c.precycles +
+ c.f_act_pdcycles + c.f_pre_pdcycles +
+ c.s_act_pdcycles + c.s_pre_pdcycles + c.sref_cycles
+ + c.sref_ref_act_cycles + c.sref_ref_pre_cycles +
+ c.spup_ref_act_cycles + c.spup_ref_pre_cycles;
EnergyDomain vdd0Domain(mps.vdd, t.clkPeriod);
- energy.act_energy = vdd0Domain.calcTivEnergy(counters.numberofacts * t.RAS , mps.idd0 - mps.idd3n);
- energy.pre_energy = vdd0Domain.calcTivEnergy(counters.numberofpres * (t.RC - t.RAS) , mps.idd0 - mps.idd2n);
- energy.read_energy = vdd0Domain.calcTivEnergy(counters.numberofreads * burstCc , mps.idd4r - mps.idd3n);
- energy.write_energy = vdd0Domain.calcTivEnergy(counters.numberofwrites * burstCc , mps.idd4w - mps.idd3n);
- energy.ref_energy = vdd0Domain.calcTivEnergy(counters.numberofrefs * t.RFC , mps.idd5 - mps.idd3n);
- energy.pre_stdby_energy = vdd0Domain.calcTivEnergy(counters.precycles, mps.idd2n);
- energy.act_stdby_energy = vdd0Domain.calcTivEnergy(counters.actcycles, mps.idd3n);
+ energy.act_energy = vdd0Domain.calcTivEnergy(c.numberofacts * t.RAS , mps.idd0 - mps.idd3n);
+ energy.pre_energy = vdd0Domain.calcTivEnergy(c.numberofpres * (t.RC - t.RAS) , mps.idd0 - mps.idd2n);
+ energy.read_energy = vdd0Domain.calcTivEnergy(c.numberofreads * burstCc , mps.idd4r - mps.idd3n);
+ energy.write_energy = vdd0Domain.calcTivEnergy(c.numberofwrites * burstCc , mps.idd4w - mps.idd3n);
+ energy.ref_energy = vdd0Domain.calcTivEnergy(c.numberofrefs * t.RFC , mps.idd5 - mps.idd3n);
+ energy.pre_stdby_energy = vdd0Domain.calcTivEnergy(c.precycles, mps.idd2n);
+ energy.act_stdby_energy = vdd0Domain.calcTivEnergy(c.actcycles, mps.idd3n);
// Idle energy in the active standby clock cycles
- energy.idle_energy_act = vdd0Domain.calcTivEnergy(counters.idlecycles_act, mps.idd3n);
+ energy.idle_energy_act = vdd0Domain.calcTivEnergy(c.idlecycles_act, mps.idd3n);
// Idle energy in the precharge standby clock cycles
- energy.idle_energy_pre = vdd0Domain.calcTivEnergy(counters.idlecycles_pre, mps.idd2n);
+ energy.idle_energy_pre = vdd0Domain.calcTivEnergy(c.idlecycles_pre, mps.idd2n);
// fast-exit active power-down cycles energy
- energy.f_act_pd_energy = vdd0Domain.calcTivEnergy(counters.f_act_pdcycles, mps.idd3p1);
+ energy.f_act_pd_energy = vdd0Domain.calcTivEnergy(c.f_act_pdcycles, mps.idd3p1);
// fast-exit precharged power-down cycles energy
- energy.f_pre_pd_energy = vdd0Domain.calcTivEnergy(counters.f_pre_pdcycles, mps.idd2p1);
+ energy.f_pre_pd_energy = vdd0Domain.calcTivEnergy(c.f_pre_pdcycles, mps.idd2p1);
// slow-exit active power-down cycles energy
- energy.s_act_pd_energy = vdd0Domain.calcTivEnergy(counters.s_act_pdcycles, mps.idd3p0);
+ energy.s_act_pd_energy = vdd0Domain.calcTivEnergy(c.s_act_pdcycles, mps.idd3p0);
// slow-exit precharged power-down cycles energy
- energy.s_pre_pd_energy = vdd0Domain.calcTivEnergy(counters.s_pre_pdcycles, mps.idd2p0);
+ energy.s_pre_pd_energy = vdd0Domain.calcTivEnergy(c.s_pre_pdcycles, mps.idd2p0);
// self-refresh cycles energy including a refresh per self-refresh entry
energy.sref_energy = engy_sref(mps.idd6, mps.idd3n,
mps.idd5, mps.vdd,
- static_cast<double>(counters.sref_cycles), static_cast<double>(counters.sref_ref_act_cycles),
- static_cast<double>(counters.sref_ref_pre_cycles), static_cast<double>(counters.spup_ref_act_cycles),
- static_cast<double>(counters.spup_ref_pre_cycles), t.clkPeriod);
+ static_cast<double>(c.sref_cycles), static_cast<double>(c.sref_ref_act_cycles),
+ static_cast<double>(c.sref_ref_pre_cycles), static_cast<double>(c.spup_ref_act_cycles),
+ static_cast<double>(c.spup_ref_pre_cycles), t.clkPeriod);
// background energy during active auto-refresh cycles in self-refresh
- energy.sref_ref_act_energy = vdd0Domain.calcTivEnergy(counters.sref_ref_act_cycles, mps.idd3p0);
+ energy.sref_ref_act_energy = vdd0Domain.calcTivEnergy(c.sref_ref_act_cycles, mps.idd3p0);
// background energy during precharged auto-refresh cycles in self-refresh
- energy.sref_ref_pre_energy = vdd0Domain.calcTivEnergy(counters.sref_ref_pre_cycles, mps.idd2p0);
+ energy.sref_ref_pre_energy = vdd0Domain.calcTivEnergy(c.sref_ref_pre_cycles, mps.idd2p0);
// background energy during active auto-refresh cycles in self-refresh exit
- energy.spup_ref_act_energy = vdd0Domain.calcTivEnergy(counters.spup_ref_act_cycles, mps.idd3n);
+ energy.spup_ref_act_energy = vdd0Domain.calcTivEnergy(c.spup_ref_act_cycles, mps.idd3n);
// background energy during precharged auto-refresh cycles in self-refresh exit
- energy.spup_ref_pre_energy = vdd0Domain.calcTivEnergy(counters.spup_ref_pre_cycles, mps.idd2n);
+ energy.spup_ref_pre_energy = vdd0Domain.calcTivEnergy(c.spup_ref_pre_cycles, mps.idd2n);
// self-refresh power-up cycles energy -- included
- energy.spup_energy = vdd0Domain.calcTivEnergy(counters.spup_cycles, mps.idd2n);
+ energy.spup_energy = vdd0Domain.calcTivEnergy(c.spup_cycles, mps.idd2n);
// active power-up cycles energy - same as active standby -- included
- energy.pup_act_energy = vdd0Domain.calcTivEnergy(counters.pup_act_cycles, mps.idd3n);
+ energy.pup_act_energy = vdd0Domain.calcTivEnergy(c.pup_act_cycles, mps.idd3n);
// precharged power-up cycles energy - same as precharged standby -- included
- energy.pup_pre_energy = vdd0Domain.calcTivEnergy(counters.pup_pre_cycles, mps.idd2n);
+ energy.pup_pre_energy = vdd0Domain.calcTivEnergy(c.pup_pre_cycles, mps.idd2n);
// similar equations as before to support multiple voltage domains in LPDDR2
// and WIDEIO memories
if (memArchSpec.twoVoltageDomains) {
EnergyDomain vdd2Domain(mps.vdd2, t.clkPeriod);
- energy.act_energy += vdd2Domain.calcTivEnergy(counters.numberofacts * t.RAS , mps.idd02 - mps.idd3n2);
- energy.pre_energy += vdd2Domain.calcTivEnergy(counters.numberofpres * (t.RC - t.RAS) , mps.idd02 - mps.idd2n2);
- energy.read_energy += vdd2Domain.calcTivEnergy(counters.numberofreads * burstCc , mps.idd4r2 - mps.idd3n2);
- energy.write_energy += vdd2Domain.calcTivEnergy(counters.numberofwrites * burstCc , mps.idd4w2 - mps.idd3n2);
- energy.ref_energy += vdd2Domain.calcTivEnergy(counters.numberofrefs * t.RFC , mps.idd52 - mps.idd3n2);
- energy.pre_stdby_energy += vdd2Domain.calcTivEnergy(counters.precycles, mps.idd2n2);
- energy.act_stdby_energy += vdd2Domain.calcTivEnergy(counters.actcycles, mps.idd3n2);
+ energy.act_energy += vdd2Domain.calcTivEnergy(c.numberofacts * t.RAS , mps.idd02 - mps.idd3n2);
+ energy.pre_energy += vdd2Domain.calcTivEnergy(c.numberofpres * (t.RC - t.RAS) , mps.idd02 - mps.idd2n2);
+ energy.read_energy += vdd2Domain.calcTivEnergy(c.numberofreads * burstCc , mps.idd4r2 - mps.idd3n2);
+ energy.write_energy += vdd2Domain.calcTivEnergy(c.numberofwrites * burstCc , mps.idd4w2 - mps.idd3n2);
+ energy.ref_energy += vdd2Domain.calcTivEnergy(c.numberofrefs * t.RFC , mps.idd52 - mps.idd3n2);
+ energy.pre_stdby_energy += vdd2Domain.calcTivEnergy(c.precycles, mps.idd2n2);
+ energy.act_stdby_energy += vdd2Domain.calcTivEnergy(c.actcycles, mps.idd3n2);
// Idle energy in the active standby clock cycles
- energy.idle_energy_act += vdd2Domain.calcTivEnergy(counters.idlecycles_act, mps.idd3n2);
+ energy.idle_energy_act += vdd2Domain.calcTivEnergy(c.idlecycles_act, mps.idd3n2);
// Idle energy in the precharge standby clock cycles
- energy.idle_energy_pre += vdd2Domain.calcTivEnergy(counters.idlecycles_pre, mps.idd2n2);
+ energy.idle_energy_pre += vdd2Domain.calcTivEnergy(c.idlecycles_pre, mps.idd2n2);
// fast-exit active power-down cycles energy
- energy.f_act_pd_energy += vdd2Domain.calcTivEnergy(counters.f_act_pdcycles, mps.idd3p12);
+ energy.f_act_pd_energy += vdd2Domain.calcTivEnergy(c.f_act_pdcycles, mps.idd3p12);
// fast-exit precharged power-down cycles energy
- energy.f_pre_pd_energy += vdd2Domain.calcTivEnergy(counters.f_pre_pdcycles, mps.idd2p12);
+ energy.f_pre_pd_energy += vdd2Domain.calcTivEnergy(c.f_pre_pdcycles, mps.idd2p12);
// slow-exit active power-down cycles energy
- energy.s_act_pd_energy += vdd2Domain.calcTivEnergy(counters.s_act_pdcycles, mps.idd3p02);
+ energy.s_act_pd_energy += vdd2Domain.calcTivEnergy(c.s_act_pdcycles, mps.idd3p02);
// slow-exit precharged power-down cycles energy
- energy.s_pre_pd_energy += vdd2Domain.calcTivEnergy(counters.s_pre_pdcycles, mps.idd2p02);
+ energy.s_pre_pd_energy += vdd2Domain.calcTivEnergy(c.s_pre_pdcycles, mps.idd2p02);
energy.sref_energy += engy_sref(mps.idd62, mps.idd3n2,
mps.idd52, mps.vdd2,
- static_cast<double>(counters.sref_cycles), static_cast<double>(counters.sref_ref_act_cycles),
- static_cast<double>(counters.sref_ref_pre_cycles), static_cast<double>(counters.spup_ref_act_cycles),
- static_cast<double>(counters.spup_ref_pre_cycles), t.clkPeriod);
+ static_cast<double>(c.sref_cycles), static_cast<double>(c.sref_ref_act_cycles),
+ static_cast<double>(c.sref_ref_pre_cycles), static_cast<double>(c.spup_ref_act_cycles),
+ static_cast<double>(c.spup_ref_pre_cycles), t.clkPeriod);
// background energy during active auto-refresh cycles in self-refresh
- energy.sref_ref_act_energy += vdd2Domain.calcTivEnergy(counters.sref_ref_act_cycles, mps.idd3p02);
+ energy.sref_ref_act_energy += vdd2Domain.calcTivEnergy(c.sref_ref_act_cycles, mps.idd3p02);
// background energy during precharged auto-refresh cycles in self-refresh
- energy.sref_ref_pre_energy += vdd2Domain.calcTivEnergy(counters.sref_ref_pre_cycles, mps.idd2p02);
+ energy.sref_ref_pre_energy += vdd2Domain.calcTivEnergy(c.sref_ref_pre_cycles, mps.idd2p02);
// background energy during active auto-refresh cycles in self-refresh exit
- energy.spup_ref_act_energy += vdd2Domain.calcTivEnergy(counters.spup_ref_act_cycles, mps.idd3n2);
+ energy.spup_ref_act_energy += vdd2Domain.calcTivEnergy(c.spup_ref_act_cycles, mps.idd3n2);
// background energy during precharged auto-refresh cycles in self-refresh exit
- energy.spup_ref_pre_energy += vdd2Domain.calcTivEnergy(counters.spup_ref_pre_cycles, mps.idd2n2);
+ energy.spup_ref_pre_energy += vdd2Domain.calcTivEnergy(c.spup_ref_pre_cycles, mps.idd2n2);
// self-refresh power-up cycles energy -- included
- energy.spup_energy += vdd2Domain.calcTivEnergy(counters.spup_cycles, mps.idd2n2);
+ energy.spup_energy += vdd2Domain.calcTivEnergy(c.spup_cycles, mps.idd2n2);
// active power-up cycles energy - same as active standby -- included
- energy.pup_act_energy += vdd2Domain.calcTivEnergy(counters.pup_act_cycles, mps.idd3n2);
+ energy.pup_act_energy += vdd2Domain.calcTivEnergy(c.pup_act_cycles, mps.idd3n2);
// precharged power-up cycles energy - same as precharged standby -- included
- energy.pup_pre_energy += vdd2Domain.calcTivEnergy(counters.pup_pre_cycles, mps.idd2n2);
+ energy.pup_pre_energy += vdd2Domain.calcTivEnergy(c.pup_pre_cycles, mps.idd2n2);
}
// auto-refresh energy during self-refresh cycles
@@ -244,7 +245,7 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec,
// energy components for both ranks (in a dual-rank system)
energy.total_energy = energy.act_energy + energy.pre_energy + energy.read_energy +
energy.write_energy + energy.ref_energy + energy.io_term_energy +
- memArchSpec.nbrOfRanks * (energy.act_stdby_energy +
+ static_cast<double>(memArchSpec.nbrOfRanks) * (energy.act_stdby_energy +
energy.pre_stdby_energy + energy.sref_energy +
energy.f_act_pd_energy + energy.f_pre_pd_energy + energy.s_act_pd_energy
+ energy.s_pre_pd_energy + energy.sref_ref_energy + energy.spup_ref_energy);
@@ -253,130 +254,100 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec,
power.average_power = energy.total_energy / (static_cast<double>(total_cycles) * t.clkPeriod);
} // MemoryPowerModel::power_calc
-void MemoryPowerModel::power_print(MemorySpecification memSpec, int term, const CommandAnalysis& counters) const
+void MemoryPowerModel::power_print(const MemorySpecification& memSpec, int term, const CommandAnalysis& c) const
{
- MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
- MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
+ const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
+ const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
+ const uint64_t nRanks = static_cast<uint64_t>(memArchSpec.nbrOfRanks);
+ const char eUnit[] = " pJ";
+ ios_base::fmtflags flags = cout.flags();
+ streamsize precision = cout.precision();
cout.precision(0);
- cout << "* Trace Details:" << endl;
- cout << "Number of Activates: " << fixed << counters.numberofacts << endl;
- cout << "Number of Reads: " << counters.numberofreads << endl;
- cout << "Number of Writes: " << counters.numberofwrites << endl;
- cout << "Number of Precharges: " << counters.numberofpres << endl;
- cout << "Number of Refreshes: " << counters.numberofrefs << endl;
- cout << "Number of Active Cycles: " << counters.actcycles << endl;
- cout << " Number of Active Idle Cycles: " << counters.idlecycles_act << endl;
- cout << " Number of Active Power-Up Cycles: " << counters.pup_act_cycles << endl;
- cout << " Number of Auto-Refresh Active cycles during Self-Refresh " <<
- "Power-Up: " << counters.spup_ref_act_cycles << endl;
- cout << "Number of Precharged Cycles: " << counters.precycles << endl;
- cout << " Number of Precharged Idle Cycles: " << counters.idlecycles_pre << endl;
- cout << " Number of Precharged Power-Up Cycles: " << counters.pup_pre_cycles
- << endl;
- cout << " Number of Auto-Refresh Precharged cycles during Self-Refresh"
- << " Power-Up: " << counters.spup_ref_pre_cycles << endl;
- cout << " Number of Self-Refresh Power-Up Cycles: " << counters.spup_cycles
- << endl;
- cout << "Total Idle Cycles (Active + Precharged): " <<
- counters.idlecycles_act + counters.idlecycles_pre << endl;
- cout << "Number of Power-Downs: " << counters.f_act_pdns +
- counters.s_act_pdns + counters.f_pre_pdns + counters.s_pre_pdns << endl;
- cout << " Number of Active Fast-exit Power-Downs: " << counters.f_act_pdns
- << endl;
- cout << " Number of Active Slow-exit Power-Downs: " << counters.s_act_pdns
- << endl;
- cout << " Number of Precharged Fast-exit Power-Downs: " <<
- counters.f_pre_pdns << endl;
- cout << " Number of Precharged Slow-exit Power-Downs: " <<
- counters.s_pre_pdns << endl;
- cout << "Number of Power-Down Cycles: " << counters.f_act_pdcycles +
- counters.s_act_pdcycles + counters.f_pre_pdcycles + counters.s_pre_pdcycles << endl;
- cout << " Number of Active Fast-exit Power-Down Cycles: " <<
- counters.f_act_pdcycles << endl;
- cout << " Number of Active Slow-exit Power-Down Cycles: " <<
- counters.s_act_pdcycles << endl;
- cout << " Number of Auto-Refresh Active cycles during Self-Refresh: " <<
- counters.sref_ref_act_cycles << endl;
- cout << " Number of Precharged Fast-exit Power-Down Cycles: " <<
- counters.f_pre_pdcycles << endl;
- cout << " Number of Precharged Slow-exit Power-Down Cycles: " <<
- counters.s_pre_pdcycles << endl;
- cout << " Number of Auto-Refresh Precharged cycles during Self-Refresh: " <<
- counters.sref_ref_pre_cycles << endl;
- cout << "Number of Auto-Refresh Cycles: " << counters.numberofrefs *
- memTimingSpec.RFC << endl;
- cout << "Number of Self-Refreshes: " << counters.numberofsrefs << endl;
- cout << "Number of Self-Refresh Cycles: " << counters.sref_cycles << endl;
- cout << "----------------------------------------" << endl;
- cout << "Total Trace Length (clock cycles): " << total_cycles << endl;
- cout << "----------------------------------------" << endl;
+ cout << "* Trace Details:" << fixed << endl
+ << endl << "#ACT commands: " << c.numberofacts
+ << endl << "#RD + #RDA commands: " << c.numberofreads
+ << endl << "#WR + #WRA commands: " << c.numberofwrites
+ /* #PRE commands (precharge all counts a number of #PRE commands equal to the number of active banks) */
+ << endl << "#PRE (+ PREA) commands: " << c.numberofpres
+ << endl << "#REF commands: " << c.numberofrefs
+ << endl << "#Active Cycles: " << c.actcycles
+ << endl << " #Active Idle Cycles: " << c.idlecycles_act
+ << endl << " #Active Power-Up Cycles: " << c.pup_act_cycles
+ << endl << " #Auto-Refresh Active cycles during Self-Refresh Power-Up: " << c.spup_ref_act_cycles
+ << endl << "#Precharged Cycles: " << c.precycles
+ << endl << " #Precharged Idle Cycles: " << c.idlecycles_pre
+ << endl << " #Precharged Power-Up Cycles: " << c.pup_pre_cycles
+ << endl << " #Auto-Refresh Precharged cycles during Self-Refresh Power-Up: " << c.spup_ref_pre_cycles
+ << endl << " #Self-Refresh Power-Up Cycles: " << c.spup_cycles
+ << endl << "Total Idle Cycles (Active + Precharged): " << c.idlecycles_act + c.idlecycles_pre
+ << endl << "#Power-Downs: " << c.f_act_pdns + c.s_act_pdns + c.f_pre_pdns + c.s_pre_pdns
+ << endl << " #Active Fast-exit Power-Downs: " << c.f_act_pdns
+ << endl << " #Active Slow-exit Power-Downs: " << c.s_act_pdns
+ << endl << " #Precharged Fast-exit Power-Downs: " << c.f_pre_pdns
+ << endl << " #Precharged Slow-exit Power-Downs: " << c.s_pre_pdns
+ << endl << "#Power-Down Cycles: " << c.f_act_pdcycles + c.s_act_pdcycles + c.f_pre_pdcycles + c.s_pre_pdcycles
+ << endl << " #Active Fast-exit Power-Down Cycles: " << c.f_act_pdcycles
+ << endl << " #Active Slow-exit Power-Down Cycles: " << c.s_act_pdcycles
+ << endl << " #Auto-Refresh Active cycles during Self-Refresh: " << c.sref_ref_act_cycles
+ << endl << " #Precharged Fast-exit Power-Down Cycles: " << c.f_pre_pdcycles
+ << endl << " #Precharged Slow-exit Power-Down Cycles: " << c.s_pre_pdcycles
+ << endl << " #Auto-Refresh Precharged cycles during Self-Refresh: " << c.sref_ref_pre_cycles
+ << endl << "#Auto-Refresh Cycles: " << c.numberofrefs * memTimingSpec.RFC
+ << endl << "#Self-Refreshes: " << c.numberofsrefs
+ << endl << "#Self-Refresh Cycles: " << c.sref_cycles
+ << endl << "----------------------------------------"
+ << endl << "Total Trace Length (clock cycles): " << total_cycles
+ << endl << "----------------------------------------" << endl;
+
cout.precision(2);
+ cout << endl << "* Trace Power and Energy Estimates:" << endl
+ << endl << "ACT Cmd Energy: " << energy.act_energy << eUnit
+ << endl << "PRE Cmd Energy: " << energy.pre_energy << eUnit
+ << endl << "RD Cmd Energy: " << energy.read_energy << eUnit
+ << endl << "WR Cmd Energy: " << energy.write_energy << eUnit;
- cout << "\n* Trace Power and Energy Estimates:" << endl;
- cout << "ACT Cmd Energy: " << energy.act_energy << " pJ" << endl;
- cout << "PRE Cmd Energy: " << energy.pre_energy << " pJ" << endl;
- cout << "RD Cmd Energy: " << energy.read_energy << " pJ" << endl;
- cout << "WR Cmd Energy: " << energy.write_energy << " pJ" << endl;
if (term) {
- cout << "RD I/O Energy: " << energy.read_io_energy << " pJ" << endl;
+ cout << "RD I/O Energy: " << energy.read_io_energy << eUnit << endl;
// No Termination for LPDDR/2/3 and DDR memories
if (memSpec.memArchSpec.termination) {
- cout << "WR Termination Energy: " << energy.write_term_energy << " pJ" << endl;
+ cout << "WR Termination Energy: " << energy.write_term_energy << eUnit << endl;
}
- if ((memArchSpec.nbrOfRanks > 1) && memSpec.memArchSpec.termination) {
- cout << "RD Termination Energy (Idle rank): " << energy.read_oterm_energy
- << " pJ" << endl;
- cout << "WR Termination Energy (Idle rank): " << energy.write_oterm_energy
- << " pJ" << endl;
+ if (nRanks > 1 && memSpec.memArchSpec.termination) {
+ cout << "RD Termination Energy (Idle rank): " << energy.read_oterm_energy << eUnit
+ << endl << "WR Termination Energy (Idle rank): " << energy.write_oterm_energy << eUnit << endl;
}
}
- cout << "ACT Stdby Energy: " << memArchSpec.nbrOfRanks * energy.act_stdby_energy <<
- " pJ" << endl;
- cout << " Active Idle Energy: " << memArchSpec.nbrOfRanks * energy.idle_energy_act <<
- " pJ" << endl;
- cout << " Active Power-Up Energy: " << memArchSpec.nbrOfRanks * energy.pup_act_energy <<
- " pJ" << endl;
- cout << " Active Stdby Energy during Auto-Refresh cycles in Self-Refresh"
- << " Power-Up: " << memArchSpec.nbrOfRanks * energy.spup_ref_act_energy <<
- " pJ" << endl;
- cout << "PRE Stdby Energy: " << memArchSpec.nbrOfRanks * energy.pre_stdby_energy <<
- " pJ" << endl;
- cout << " Precharge Idle Energy: " << memArchSpec.nbrOfRanks * energy.idle_energy_pre <<
- " pJ" << endl;
- cout << " Precharged Power-Up Energy: " << memArchSpec.nbrOfRanks * energy.pup_pre_energy <<
- " pJ" << endl;
- cout << " Precharge Stdby Energy during Auto-Refresh cycles " <<
- "in Self-Refresh Power-Up: " << memArchSpec.nbrOfRanks * energy.spup_ref_pre_energy <<
- " pJ" << endl;
- cout << " Self-Refresh Power-Up Energy: " << memArchSpec.nbrOfRanks * energy.spup_energy <<
- " pJ" << endl;
- cout << "Total Idle Energy (Active + Precharged): " << memArchSpec.nbrOfRanks *
- (energy.idle_energy_act + energy.idle_energy_pre) << " pJ" << endl;
- cout << "Total Power-Down Energy: " << memArchSpec.nbrOfRanks * (energy.f_act_pd_energy +
- energy.f_pre_pd_energy + energy.s_act_pd_energy + energy.s_pre_pd_energy) << " pJ" << endl;
- cout << " Fast-Exit Active Power-Down Energy: " << memArchSpec.nbrOfRanks *
- energy.f_act_pd_energy << " pJ" << endl;
- cout << " Slow-Exit Active Power-Down Energy: " << memArchSpec.nbrOfRanks *
- energy.s_act_pd_energy << " pJ" << endl;
- cout << " Slow-Exit Active Power-Down Energy during Auto-Refresh cycles "
- << "in Self-Refresh: " << memArchSpec.nbrOfRanks * energy.sref_ref_act_energy <<
- " pJ" << endl;
- cout << " Fast-Exit Precharged Power-Down Energy: " << memArchSpec.nbrOfRanks *
- energy.f_pre_pd_energy << " pJ" << endl;
- cout << " Slow-Exit Precharged Power-Down Energy: " << memArchSpec.nbrOfRanks *
- energy.s_pre_pd_energy << " pJ" << endl;
- cout << " Slow-Exit Precharged Power-Down Energy during Auto-Refresh " <<
- "cycles in Self-Refresh: " << memArchSpec.nbrOfRanks * energy.sref_ref_pre_energy <<
- " pJ" << endl;
- cout << "Auto-Refresh Energy: " << energy.ref_energy << " pJ" << endl;
- cout << "Self-Refresh Energy: " << memArchSpec.nbrOfRanks * energy.sref_energy <<
- " pJ" << endl;
- cout << "----------------------------------------" << endl;
- cout << "Total Trace Energy: " << energy.total_energy << " pJ" << endl;
- cout << "Average Power: " << power.average_power << " mW" << endl;
- cout << "----------------------------------------" << endl;
+
+ double nRanksDouble = static_cast<double>(nRanks);
+
+ cout << "ACT Stdby Energy: " << nRanksDouble * energy.act_stdby_energy << eUnit
+ << endl << " Active Idle Energy: " << nRanksDouble * energy.idle_energy_act << eUnit
+ << endl << " Active Power-Up Energy: " << nRanksDouble * energy.pup_act_energy << eUnit
+ << endl << " Active Stdby Energy during Auto-Refresh cycles in Self-Refresh Power-Up: " << nRanksDouble * energy.spup_ref_act_energy << eUnit
+ << endl << "PRE Stdby Energy: " << nRanksDouble * energy.pre_stdby_energy << eUnit
+ << endl << " Precharge Idle Energy: " << nRanksDouble * energy.idle_energy_pre << eUnit
+ << endl << " Precharged Power-Up Energy: " << nRanksDouble * energy.pup_pre_energy << eUnit
+ << endl << " Precharge Stdby Energy during Auto-Refresh cycles in Self-Refresh Power-Up: " << nRanksDouble * energy.spup_ref_pre_energy << eUnit
+ << endl << " Self-Refresh Power-Up Energy: " << nRanksDouble * energy.spup_energy << eUnit
+ << endl << "Total Idle Energy (Active + Precharged): " << nRanksDouble * (energy.idle_energy_act + energy.idle_energy_pre) << eUnit
+ << endl << "Total Power-Down Energy: " << nRanksDouble * (energy.f_act_pd_energy + energy.f_pre_pd_energy + energy.s_act_pd_energy + energy.s_pre_pd_energy) << eUnit
+ << endl << " Fast-Exit Active Power-Down Energy: " << nRanksDouble * energy.f_act_pd_energy << eUnit
+ << endl << " Slow-Exit Active Power-Down Energy: " << nRanksDouble * energy.s_act_pd_energy << eUnit
+ << endl << " Slow-Exit Active Power-Down Energy during Auto-Refresh cycles in Self-Refresh: " << nRanksDouble * energy.sref_ref_act_energy << eUnit
+ << endl << " Fast-Exit Precharged Power-Down Energy: " << nRanksDouble * energy.f_pre_pd_energy << eUnit
+ << endl << " Slow-Exit Precharged Power-Down Energy: " << nRanksDouble * energy.s_pre_pd_energy << eUnit
+ << endl << " Slow-Exit Precharged Power-Down Energy during Auto-Refresh cycles in Self-Refresh: " << nRanksDouble * energy.sref_ref_pre_energy << eUnit
+ << endl << "Auto-Refresh Energy: " << energy.ref_energy << eUnit
+ << endl << "Self-Refresh Energy: " << nRanksDouble * energy.sref_energy << eUnit
+ << endl << "----------------------------------------"
+ << endl << "Total Trace Energy: " << energy.total_energy << eUnit
+ << endl << "Average Power: " << power.average_power << " mW"
+ << endl << "----------------------------------------" << endl;
+
+ cout.flags(flags);
+ cout.precision(precision);
} // MemoryPowerModel::power_print
// Self-refresh active energy estimation (not including background energy)
@@ -395,11 +366,11 @@ double MemoryPowerModel::engy_sref(double idd6, double idd3n, double idd5,
// IO and Termination power calculation based on Micron Power Calculators
// Absolute power measures are obtained from Micron Power Calculator (mentioned in mW)
-void MemoryPowerModel::io_term_power(MemorySpecification memSpec)
+void MemoryPowerModel::io_term_power(const MemorySpecification& memSpec)
{
- MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
- MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
- MemPowerSpec& memPowerSpec = memSpec.memPowerSpec;
+ const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec;
+ const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec;
+ const MemPowerSpec& memPowerSpec = memSpec.memPowerSpec;
power.IO_power = memPowerSpec.ioPower; // in mW
power.WR_ODT_power = memPowerSpec.wrOdtPower; // in mW
diff --git a/ext/drampower/src/MemoryPowerModel.h b/ext/drampower/src/MemoryPowerModel.h
index b894f67dd..2b2304989 100644
--- a/ext/drampower/src/MemoryPowerModel.h
+++ b/ext/drampower/src/MemoryPowerModel.h
@@ -46,9 +46,9 @@ class MemoryPowerModel {
public:
// Calculate energy and average power consumption for the given memory
// command trace
- void power_calc(MemorySpecification memSpec,
- const CommandAnalysis& counters,
- int term);
+ void power_calc(const MemorySpecification& memSpec,
+ const CommandAnalysis& c,
+ int term);
// Used to calculate self-refresh active energy
static double engy_sref(double idd6,
@@ -145,12 +145,12 @@ class MemoryPowerModel {
};
// Print the power and energy
- void power_print(MemorySpecification memSpec,
+ void power_print(const MemorySpecification& memSpec,
int term,
- const CommandAnalysis& counters) const;
+ const CommandAnalysis& c) const;
// To derive IO and Termination Power measures using DRAM specification
- void io_term_power(MemorySpecification memSpec);
+ void io_term_power(const MemorySpecification& memSpec);
Energy energy;
Power power;
diff --git a/ext/drampower/src/MemorySpecification.h b/ext/drampower/src/MemorySpecification.h
index 149d41c28..16d77ef86 100644
--- a/ext/drampower/src/MemorySpecification.h
+++ b/ext/drampower/src/MemorySpecification.h
@@ -106,7 +106,8 @@ class MemoryType {
return val == LPDDR ||
val == LPDDR2 ||
val == LPDDR3 ||
- val == WIDEIO_SDR;
+ val == WIDEIO_SDR ||
+ val == DDR4;
}
bool isDDRFamily() const
@@ -132,9 +133,11 @@ class MemoryType {
double getCapacitance() const
{
- // LPDDR/2/3 and DDR memories only have IO Power (no ODT)
- // Conservative estimates based on Micron Mobile LPDDR2 Power Calculator
- // LPDDR/2/3 IO Capacitance in mF
+ // LPDDR1/2 memories only have IO Power (no ODT)
+ // LPDDR3 has optional ODT, but it is typically not used (reflections are elimitated by other means (layout))
+ // The capacitance values are conservative and based on Micron Mobile LPDDR2 Power Calculator
+
+ // LPDDR/2/3 IO Capacitance in mF
if (val == LPDDR) {
return 0.0000000045;
} else if (val == LPDDR2) {
diff --git a/ext/drampower/src/TraceParser.cc b/ext/drampower/src/TraceParser.cc
index ec87f06da..2cf9a8572 100644
--- a/ext/drampower/src/TraceParser.cc
+++ b/ext/drampower/src/TraceParser.cc
@@ -42,14 +42,19 @@
using namespace Data;
using namespace std;
+TraceParser::TraceParser(int64_t nbrOfBanks) :
+ counters(nbrOfBanks)
+{
+}
+
+
Data::MemCommand TraceParser::parseLine(std::string line)
{
- MemCommand memcmd;
+ MemCommand memcmd(MemCommand::UNINITIALIZED, 0, 0);
istringstream linestream(line);
string item;
- double item_val;
+ int64_t item_val;
unsigned itemnum = 0;
- MemCommand::cmds type = MemCommand::NOP; // Initialized to prevent warning
while (getline(linestream, item, ',')) {
if (itemnum == 0) {
@@ -62,10 +67,8 @@ Data::MemCommand TraceParser::parseLine(std::string line)
} else if (itemnum == 2) {
stringstream bank(item);
bank >> item_val;
- memcmd.setType(type);
memcmd.setBank(static_cast<unsigned>(item_val));
}
- type = memcmd.getType();
itemnum++;
}
return memcmd;
@@ -90,13 +93,13 @@ void TraceParser::parseFile(MemorySpecification memSpec, std::ifstream& trace,
cmd_list.push_back(cmdline);
nCommands++;
if (nCommands == window) {
- counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmd_list, lastupdate);
+ counters.getCommands(memSpec, cmd_list, lastupdate);
nCommands = 0;
cmd_list.clear();
}
}
lastupdate = true;
- counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmd_list, lastupdate);
+ counters.getCommands(memSpec, cmd_list, lastupdate);
cmd_list.clear();
pwr_trace.close();
} else {
@@ -106,13 +109,13 @@ void TraceParser::parseFile(MemorySpecification memSpec, std::ifstream& trace,
cmd_list.push_back(cmdline);
nCommands++;
if (nCommands == window) {
- counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmd_list, lastupdate);
+ counters.getCommands(memSpec, cmd_list, lastupdate);
nCommands = 0;
cmd_list.clear();
}
}
lastupdate = true;
- counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmd_list, lastupdate);
+ counters.getCommands(memSpec, cmd_list, lastupdate);
cmd_list.clear();
}
counters.clear();
diff --git a/ext/drampower/src/TraceParser.h b/ext/drampower/src/TraceParser.h
index cabfcd395..9727b4800 100644
--- a/ext/drampower/src/TraceParser.h
+++ b/ext/drampower/src/TraceParser.h
@@ -48,6 +48,7 @@
class TraceParser {
public:
+ TraceParser(int64_t nbrOfBanks);
// list of parsed commands
std::vector<Data::MemCommand> cmd_list;
diff --git a/ext/drampower/src/Utils.h b/ext/drampower/src/Utils.h
index 4aa8bb220..80f4390c7 100644
--- a/ext/drampower/src/Utils.h
+++ b/ext/drampower/src/Utils.h
@@ -41,9 +41,7 @@
#include <string>
#include <sstream>
#include <stdexcept>
-#include <typeinfo>
-#define MILLION 1000000
template<typename T>
T fromString(const std::string& s,
@@ -54,7 +52,7 @@ throw(std::runtime_error)
T t;
if (!(is >> f >> t)) {
- throw std::runtime_error("fromString cannot convert " + s);
+ throw std::runtime_error("Cannot convert string");
}
return t;
diff --git a/ext/drampower/src/libdrampower/LibDRAMPower.cc b/ext/drampower/src/libdrampower/LibDRAMPower.cc
index ac16f948b..47ed15a99 100644
--- a/ext/drampower/src/libdrampower/LibDRAMPower.cc
+++ b/ext/drampower/src/libdrampower/LibDRAMPower.cc
@@ -52,13 +52,13 @@ libDRAMPower::~libDRAMPower()
void libDRAMPower::doCommand(MemCommand::cmds type, int bank, int64_t timestamp)
{
- MemCommand cmd(type, static_cast<unsigned>(bank), static_cast<double>(timestamp));
+ MemCommand cmd(type, static_cast<unsigned>(bank), timestamp);
cmdList.push_back(cmd);
}
void libDRAMPower::updateCounters(bool lastUpdate)
{
- counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmdList, lastUpdate);
+ counters.getCommands(memSpec, cmdList, lastUpdate);
cmdList.clear();
}
@@ -72,6 +72,11 @@ void libDRAMPower::clearState()
counters.clear();
}
+void libDRAMPower::clearCounters(int64_t timestamp)
+{
+ counters.clearStats(timestamp);
+}
+
const Data::MemoryPowerModel::Energy& libDRAMPower::getEnergy() const
{
return mpm.energy;
diff --git a/ext/drampower/src/libdrampower/LibDRAMPower.h b/ext/drampower/src/libdrampower/LibDRAMPower.h
index 9dea8b0f5..4d9ccefe5 100644
--- a/ext/drampower/src/libdrampower/LibDRAMPower.h
+++ b/ext/drampower/src/libdrampower/LibDRAMPower.h
@@ -56,6 +56,8 @@ class libDRAMPower {
void updateCounters(bool lastUpdate);
+ void clearCounters(int64_t timestamp);
+
void clearState();
void calcEnergy();
diff --git a/ext/drampower/test/libdrampowertest/lib_test.cc b/ext/drampower/test/libdrampowertest/lib_test.cc
index f382a727e..20d4d9ebf 100644
--- a/ext/drampower/test/libdrampowertest/lib_test.cc
+++ b/ext/drampower/test/libdrampowertest/lib_test.cc
@@ -79,7 +79,8 @@ int main(int argc, char* argv[])
test.doCommand(MemCommand::RDA,0,210);
test.doCommand(MemCommand::ACT,4,232);
test.doCommand(MemCommand::WRA,4,247);
- test.doCommand(MemCommand::PDN_F_ACT,3,248);
+ // Need at least tWRAPDEN = AL + CWL + BL/2 + WR + 1 cycles between WR and PDN_F_PRE
+ test.doCommand(MemCommand::PDN_F_PRE,3,265);
//set bool to true when this is the last update of the counters
test.updateCounters(true);
@@ -106,7 +107,7 @@ int main(int argc, char* argv[])
//test.getEnergy().act_stdby_energy
std::cout << "ACT Std Energy" << "\t" << test.getEnergy().act_stdby_energy << endl;
//total active standby energy for both ranks
- std::cout << "ACT Std Energy total ranks" << "\t" << memSpec.memArchSpec.nbrOfRanks *
+ std::cout << "ACT Std Energy total ranks" << "\t" << static_cast<double>(memSpec.memArchSpec.nbrOfRanks) *
test.getEnergy().act_stdby_energy << "\n" ;
std::cout << "PRE Std Energy" << "\t" << test.getEnergy().pre_stdby_energy << endl;
std::cout << "Total Energy" << "\t" << test.getEnergy().total_energy << endl;
diff --git a/src/mem/SerialLink.py b/src/mem/SerialLink.py
index f05f2872d..fd9b0ff6b 100644
--- a/src/mem/SerialLink.py
+++ b/src/mem/SerialLink.py
@@ -61,3 +61,5 @@ class SerialLink(MemObject):
# link belongs to and the number of lanes:
num_lanes = Param.Unsigned(1, "Number of parallel lanes inside the serial"
"link. (aka. lane width)")
+ link_speed = Param.UInt64(1, "Gb/s Speed of each parallel lane inside the"
+ "serial link. (aka. lane speed)")
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh
index 6cd72b266..f59528492 100644
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -41,6 +41,7 @@
* Ani Udipi
* Neha Agarwal
* Omar Naji
+ * Matthias Jung
*/
/**
@@ -862,7 +863,7 @@ class DRAMCtrl : public AbstractMemory
*/
static bool sortTime(const Data::MemCommand& m1,
const Data::MemCommand& m2) {
- return m1.getTime() < m2.getTime();
+ return m1.getTimeInt64() < m2.getTimeInt64();
};
diff --git a/src/mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.py b/src/mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.py
index c7833ee96..5a4f3026e 100644
--- a/src/mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.py
+++ b/src/mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.py
@@ -53,19 +53,20 @@ class GarnetIntLink_d(BasicIntLink):
cxx_header = "mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.hh"
# The detailed fixed pipeline bi-directional link include two main
# forward links and two backward flow-control links, one per direction
- nls = []
+ _nls = []
# In uni-directional link
- nls.append(NetworkLink_d());
+ _nls.append(NetworkLink_d());
# Out uni-directional link
- nls.append(NetworkLink_d());
- network_links = VectorParam.NetworkLink_d(nls, "forward links")
+ _nls.append(NetworkLink_d());
+ network_links = VectorParam.NetworkLink_d(_nls, "forward links")
- cls = []
+ _cls = []
# In uni-directional link
- cls.append(CreditLink_d());
+ _cls.append(CreditLink_d());
# Out uni-directional link
- cls.append(CreditLink_d());
- credit_links = VectorParam.CreditLink_d(cls, "backward flow-control links")
+ _cls.append(CreditLink_d());
+ credit_links = VectorParam.CreditLink_d(_cls,
+ "backward flow-control links")
# Exterior fixed pipeline links between a router and a controller
class GarnetExtLink_d(BasicExtLink):
@@ -73,16 +74,17 @@ class GarnetExtLink_d(BasicExtLink):
cxx_header = "mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.hh"
# The detailed fixed pipeline bi-directional link include two main
# forward links and two backward flow-control links, one per direction
- nls = []
+ _nls = []
# In uni-directional link
- nls.append(NetworkLink_d());
+ _nls.append(NetworkLink_d());
# Out uni-directional link
- nls.append(NetworkLink_d());
- network_links = VectorParam.NetworkLink_d(nls, "forward links")
+ _nls.append(NetworkLink_d());
+ network_links = VectorParam.NetworkLink_d(_nls, "forward links")
- cls = []
+ _cls = []
# In uni-directional link
- cls.append(CreditLink_d());
+ _cls.append(CreditLink_d());
# Out uni-directional link
- cls.append(CreditLink_d());
- credit_links = VectorParam.CreditLink_d(cls, "backward flow-control links")
+ _cls.append(CreditLink_d());
+ credit_links = VectorParam.CreditLink_d(_cls,
+ "backward flow-control links")
diff --git a/src/mem/serial_link.cc b/src/mem/serial_link.cc
index b6cb097b7..25f5291bb 100644
--- a/src/mem/serial_link.cc
+++ b/src/mem/serial_link.cc
@@ -87,7 +87,9 @@ SerialLink::SerialLink(SerialLinkParams *p)
ticksToCycles(p->delay), p->resp_size, p->ranges),
masterPort(p->name + ".master", *this, slavePort,
ticksToCycles(p->delay), p->req_size),
- num_lanes(p->num_lanes)
+ num_lanes(p->num_lanes),
+ link_speed(p->link_speed)
+
{
}
@@ -153,8 +155,9 @@ SerialLink::SerialLinkMasterPort::recvTimingResp(PacketPtr pkt)
// have to wait to receive the whole packet. So we only account for the
// deserialization latency.
Cycles cycles = delay;
- cycles += Cycles(divCeil(pkt->getSize() * 8, serial_link.num_lanes));
- Tick t = serial_link.clockEdge(cycles);
+ cycles += Cycles(divCeil(pkt->getSize() * 8, serial_link.num_lanes
+ * serial_link.link_speed));
+ Tick t = serial_link.clockEdge(cycles);
//@todo: If the processor sends two uncached requests towards HMC and the
// second one is smaller than the first one. It may happen that the second
@@ -214,7 +217,7 @@ SerialLink::SerialLinkSlavePort::recvTimingReq(PacketPtr pkt)
// only.
Cycles cycles = delay;
cycles += Cycles(divCeil(pkt->getSize() * 8,
- serial_link.num_lanes));
+ serial_link.num_lanes * serial_link.link_speed));
Tick t = serial_link.clockEdge(cycles);
//@todo: If the processor sends two uncached requests towards HMC
@@ -301,7 +304,7 @@ SerialLink::SerialLinkMasterPort::trySendTiming()
// Make sure bandwidth limitation is met
Cycles cycles = Cycles(divCeil(pkt->getSize() * 8,
- serial_link.num_lanes));
+ serial_link.num_lanes * serial_link.link_speed));
Tick t = serial_link.clockEdge(cycles);
serial_link.schedule(sendEvent, std::max(next_req.tick, t));
}
@@ -346,7 +349,7 @@ SerialLink::SerialLinkSlavePort::trySendTiming()
// Make sure bandwidth limitation is met
Cycles cycles = Cycles(divCeil(pkt->getSize() * 8,
- serial_link.num_lanes));
+ serial_link.num_lanes * serial_link.link_speed));
Tick t = serial_link.clockEdge(cycles);
serial_link.schedule(sendEvent, std::max(next_resp.tick, t));
}
diff --git a/src/mem/serial_link.hh b/src/mem/serial_link.hh
index d4f6ca488..9fbcce335 100644
--- a/src/mem/serial_link.hh
+++ b/src/mem/serial_link.hh
@@ -312,6 +312,9 @@ class SerialLink : public MemObject
/** Number of parallel lanes in this serial link */
unsigned num_lanes;
+ /** Speed of each link (Gb/s) in this serial link */
+ uint64_t link_speed;
+
public:
virtual BaseMasterPort& getMasterPort(const std::string& if_name,
diff --git a/tests/quick/se/70.tgen/traffic.cfg b/tests/quick/se/70.tgen/traffic.cfg
new file mode 100644
index 000000000..88e642553
--- /dev/null
+++ b/tests/quick/se/70.tgen/traffic.cfg
@@ -0,0 +1,7 @@
+STATE 0 10000 RANDOM 100 0 134217727 256 1000 1000 0
+STATE 1 1000000 TRACE tests/quick/se/70.tgen/tgen-simple-mem.trc 100
+STATE 2 1000 IDLE
+INIT 0
+TRANSITION 0 1 1
+TRANSITION 1 2 1
+TRANSITION 2 0 1
diff --git a/util/systemc/main.cc b/util/systemc/main.cc
index 75a77853b..c9fbd48a0 100644
--- a/util/systemc/main.cc
+++ b/util/systemc/main.cc
@@ -74,6 +74,9 @@
#include "sc_module.hh"
#include "stats.hh"
+// Defining global string variable decalred in stats.hh
+std::string filename;
+
void
usage(const std::string &prog_name)
{
@@ -289,7 +292,7 @@ void SimControl::run()
std::cerr << "Waiting for " << wait_period << "ps for"
" SystemC to catch up to gem5\n";
- wait(sc_core::sc_time(wait_period, sc_core::SC_PS));
+ wait(sc_core::sc_time::from_value(wait_period));
}
config_manager->loadState(*checkpoint);
@@ -383,7 +386,11 @@ sc_main(int argc, char **argv)
{
SimControl sim_control("gem5", argc, argv);
+ filename = "m5out/stats-systemc.txt";
+
sc_core::sc_start();
+ CxxConfig::statsDump();
+
return EXIT_SUCCESS;
}
diff --git a/util/systemc/stats.cc b/util/systemc/stats.cc
index ef5d9b5d3..54d149474 100644
--- a/util/systemc/stats.cc
+++ b/util/systemc/stats.cc
@@ -35,6 +35,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Andrew Bardsley
+ * Matthias Jung
+ * Abdul Mutaal Ahmad
*/
/**
@@ -45,7 +47,9 @@
* Register with: Stats::registerHandlers(statsReset, statsDump)
*/
+#include "base/output.hh"
#include "base/statistics.hh"
+#include "base/stats/text.hh"
#include "stats.hh"
namespace CxxConfig
@@ -56,45 +60,76 @@ void statsPrepare()
std::list<Stats::Info *> stats = Stats::statsList();
/* gather_stats -> prepare */
- for (auto i = stats.begin(); i != stats.end(); ++i)
- (*i)->prepare();
+ for (auto i = stats.begin(); i != stats.end(); ++i){
+ Stats::Info *stat = *i;
+ Stats::VectorInfo *vector = dynamic_cast<Stats::VectorInfo *>(stat);
+ if (vector){
+ (dynamic_cast<Stats::VectorInfo *>(*i))->prepare();
+ }
+ else {
+ (*i)->prepare();
+ }
+
+ }
}
void statsDump()
{
- std::cerr << "Stats dump\n";
+ bool desc = true;
+ Stats::Output *output = Stats::initText(filename, desc);
Stats::processDumpQueue();
std::list<Stats::Info *> stats = Stats::statsList();
+ statsEnable();
statsPrepare();
+ output->begin();
/* gather_stats -> convert_value */
for (auto i = stats.begin(); i != stats.end(); ++i) {
Stats::Info *stat = *i;
- Stats::ScalarInfo *scalar = dynamic_cast<Stats::ScalarInfo *>(stat);
+ const Stats::ScalarInfo *scalar = dynamic_cast<Stats::ScalarInfo
+ *>(stat);
Stats::VectorInfo *vector = dynamic_cast<Stats::VectorInfo *>(stat);
-
- if (scalar) {
- std::cerr << "SCALAR " << stat->name << ' '
- << scalar->value() << '\n';
- } else if (vector) {
- Stats::VResult results = vector->value();
-
- unsigned int index = 0;
- for (auto e = results.begin(); e != results.end(); ++e) {
- std::cerr << "VECTOR " << stat->name << '[' << index
- << "] " << (*e) << '\n';
- index++;
+ const Stats::Vector2dInfo *vector2d = dynamic_cast<Stats::Vector2dInfo
+ *>(vector);
+ const Stats::DistInfo *dist = dynamic_cast<Stats::DistInfo *>(stat);
+ const Stats::VectorDistInfo *vectordist =
+ dynamic_cast<Stats::VectorDistInfo *>(stat);
+ const Stats::SparseHistInfo *sparse =
+ dynamic_cast<Stats::SparseHistInfo *>(stat);
+ const Stats::InfoProxy <Stats::Vector2d,Stats::Vector2dInfo> *info =
+ dynamic_cast<Stats::InfoProxy
+ <Stats::Vector2d,Stats::Vector2dInfo>*>(stat);
+
+ if (vector) {
+ const Stats::FormulaInfo *formula = dynamic_cast<Stats::FormulaInfo
+ *>(vector);
+ if (formula){
+ output->visit(*formula);
+ } else {
+ const Stats::VectorInfo *vector1 = vector;
+ output->visit(*vector1);
}
- std::cerr << "VTOTAL " << stat->name << ' '
- << vector->total() << '\n';
+ } else if (vector2d) {
+ output->visit(*vector2d);
+ } else if (info){
+ output->visit(*info);
+ } else if (vectordist){
+ output->visit(*vectordist);
+ } else if (dist) {
+ output->visit(*dist);
+ } else if (sparse) {
+ output->visit(*sparse);
+ } else if (scalar) {
+ output->visit(*scalar);
} else {
- std::cerr << "?????? " << stat->name << '\n';
+ warn("Stat not dumped: %s\n", stat->name);
}
}
+ output->end();
}
void statsReset()
@@ -108,8 +143,17 @@ void statsEnable()
{
std::list<Stats::Info *> stats = Stats::statsList();
- for (auto i = stats.begin(); i != stats.end(); ++i)
- (*i)->enable();
+ for (auto i = stats.begin(); i != stats.end(); ++i){
+ Stats::Info *stat = *i;
+ Stats::VectorInfo *vector = dynamic_cast<Stats::VectorInfo *>(stat);
+ if (vector){
+ (dynamic_cast<Stats::VectorInfo *>(*i))->enable();
+ }
+ else {
+ (*i)->enable();
+ }
+
+ }
}
}
diff --git a/util/systemc/stats.hh b/util/systemc/stats.hh
index 360cb6293..9dac960ee 100644
--- a/util/systemc/stats.hh
+++ b/util/systemc/stats.hh
@@ -35,6 +35,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Andrew Bardsley
+ * Matthias Jung
+ * Abdul Mutaal Ahmad
*/
/**
@@ -48,6 +50,8 @@
#ifndef __UTIL_CXX_CONFIG_STATS_H__
#define __UTIL_CXX_CONFIG_STATS_H__
+extern std::string filename;
+
namespace CxxConfig
{
diff --git a/util/tlm/README b/util/tlm/README
index 126705296..fc620f145 100644
--- a/util/tlm/README
+++ b/util/tlm/README
@@ -94,3 +94,26 @@ The parameter -o specifies the begining of the memory region (0x80000000).
The system should boot now.
For conveniance a run_gem5.sh file holds all those commands
+
+
+III. Elastic Trace Setup
+========================
+
+Elastic traces can also be replayed into the SystemC world.
+For more information on elastic traces please refer to:
+
+ - http://www.gem5.org/TraceCPU
+
+ - Exploring System Performance using Elastic Traces:
+ Fast, Accurate and Portable
+ R. Jagtap, S. Diestelhorst, A. Hansson, M. Jung, N. Wehn.
+ IEEE International Conference on Embedded Computer Systems Architectures
+ Modeling and Simulation (SAMOS), July, 2016, Samos Island, Greece.
+
+Similar to I. the simulation can be set up with this command:
+
+> ../../build/ARM/gem5.opt ./tlm_elastic.py
+
+Then:
+
+> ./gem5.opt.sc m5out/config.ini
diff --git a/util/tlm/main.cc b/util/tlm/main.cc
index c06565603..bf442e02b 100644
--- a/util/tlm/main.cc
+++ b/util/tlm/main.cc
@@ -30,6 +30,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Matthias Jung
+ * Abdul Mutaal Ahmad
*/
/**
@@ -67,6 +68,9 @@
#include "sim/system.hh"
#include "stats.hh"
+// Defining global string variable decalred in stats.hh
+std::string filename;
+
void usage(const std::string &prog_name)
{
std::cerr << "Usage: " << prog_name << (
@@ -296,6 +300,8 @@ sc_main(int argc, char **argv)
SimControl sim_control("gem5", argc, argv);
Target *memory;
+ filename = "m5out/stats-tlm.txt";
+
tlm::tlm_initiator_socket <> *mem_port =
dynamic_cast<tlm::tlm_initiator_socket<> *>(
sc_core::sc_find_object("gem5.memory")
@@ -319,5 +325,7 @@ sc_main(int argc, char **argv)
SC_REPORT_INFO("sc_main", "End of Simulation");
+ CxxConfig::statsDump();
+
return EXIT_SUCCESS;
}
diff --git a/util/tlm/tlm_elastic.py b/util/tlm/tlm_elastic.py
new file mode 100644
index 000000000..3de0670c0
--- /dev/null
+++ b/util/tlm/tlm_elastic.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2016, University of Kaiserslautern
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
+# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Matthias Jung
+
+import m5
+import optparse
+
+from m5.objects import *
+from m5.util import addToPath, fatal
+
+addToPath('../../configs/common/')
+
+from Caches import *
+
+# This configuration shows a simple setup of a Elastic Trace Player (eTraceCPU)
+# and an external TLM port for SystemC co-simulation.
+#
+# We assume a DRAM size of 512MB and L1 cache sizes of 32KB.
+#
+# Base System Architecture:
+#
+# +-----------+ ^
+# +-------------+ | eTraceCPU | |
+# | System Port | +-----+-----+ |
+# +------+------+ | $D1 | $I1 | |
+# | +--+--+--+--+ |
+# | | | | gem5 World
+# | | | | (see this file)
+# | | | |
+# +------v------------v-----v--+ |
+# | Membus | v
+# +----------------+-----------+ External Port (see sc_port.*)
+# | ^
+# +---v---+ | TLM World
+# | TLM | | (see sc_target.*)
+# +-------+ v
+#
+#
+# Create a system with a Crossbar and an Elastic Trace Player as CPU:
+
+# Setup System:
+system = System(cpu=TraceCPU(cpu_id=0),
+ mem_mode='timing',
+ mem_ranges = [AddrRange('512MB')],
+ cache_line_size = 64)
+
+# Create a top-level voltage domain:
+system.voltage_domain = VoltageDomain()
+
+# Create a source clock for the system. This is used as the clock period for
+# xbar and memory:
+system.clk_domain = SrcClockDomain(clock = '1GHz',
+ voltage_domain = system.voltage_domain)
+
+# Create a CPU voltage domain:
+system.cpu_voltage_domain = VoltageDomain()
+
+# Create a separate clock domain for the CPUs. In case of Trace CPUs this clock
+# is actually used only by the caches connected to the CPU:
+system.cpu_clk_domain = SrcClockDomain(clock = '1GHz',
+ voltage_domain = system.cpu_voltage_domain)
+
+# Setup CPU and its L1 caches:
+system.cpu.createInterruptController()
+system.cpu.icache = L1_ICache(size="32kB")
+system.cpu.dcache = L1_DCache(size="32kB")
+system.cpu.icache.cpu_side = system.cpu.icache_port
+system.cpu.dcache.cpu_side = system.cpu.dcache_port
+
+# Assign input trace files to the eTraceCPU:
+system.cpu.instTraceFile="system.cpu.traceListener.inst.gz"
+system.cpu.dataTraceFile="system.cpu.traceListener.data.gz"
+
+# Setting up L1 BUS:
+system.membus = IOXBar(width = 16)
+system.physmem = SimpleMemory() # This must be instantiated, even if not needed
+
+# Create a external TLM port:
+system.tlm = ExternalSlave()
+system.tlm.addr_ranges = [AddrRange('512MB')]
+system.tlm.port_type = "tlm"
+system.tlm.port_data = "memory"
+
+# Connect everything:
+system.membus = SystemXBar()
+system.system_port = system.membus.slave
+system.cpu.icache.mem_side = system.membus.slave
+system.cpu.dcache.mem_side = system.membus.slave
+system.membus.master = system.tlm.port
+
+# Start the simulation:
+root = Root(full_system = False, system = system)
+root.system.mem_mode = 'timing'
+m5.instantiate()
+m5.simulate() #Simulation time specified later on commandline