diff options
33 files changed, 1296 insertions, 771 deletions
diff --git a/configs/common/HMC.py b/configs/common/HMC.py index 130729f88..fcff94cc7 100644 --- a/configs/common/HMC.py +++ b/configs/common/HMC.py @@ -37,6 +37,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Authors: Erfan Azarkhish +# Abdul Mutaal Ahmad # A Simplified model of a complete HMC device. Based on: # [1] http://www.hybridmemorycube.org/specification-download/ @@ -48,6 +49,10 @@ # (G. Kim et. al) # [5] Near Data Processing, Are we there yet? (M. Gokhale) # http://www.cs.utah.edu/wondp/gokhale.pdf +# [6] openHMC - A Configurable Open-Source Hybrid Memory Cube Controller +# (J. Schmidt) +# [7] Hybrid Memory Cube performance characterization on data-centric +# workloads (M. Gokhale) # # This script builds a complete HMC device composed of vault controllers, # serial links, the main internal crossbar, and an external hmc controller. @@ -60,23 +65,62 @@ # This component is simply an instance of the NoncoherentXBar class, and its # parameters are tuned to [2]. # -# - SERIAL LINKS: +# - SERIAL LINKS CONTROLLER: # SerialLink is a simple variation of the Bridge class, with the ability to -# account for the latency of packet serialization. We assume that the -# serializer component at the transmitter side does not need to receive the -# whole packet to start the serialization. But the deserializer waits for -# the complete packet to check its integrity first. +# account for the latency of packet serialization and controller latency. We +# assume that the serializer component at the transmitter side does not need +# to receive the whole packet to start the serialization. But the +# deserializer waits for the complete packet to check its integrity first. +# # * Bandwidth of the serial links is not modeled in the SerialLink component -# itself. Instead bandwidth/port of the HMCController has been adjusted to -# reflect the bandwidth delivered by 1 serial link. +# itself. +# +# * Latency of serial link controller is composed of SerDes latency + link +# controller # -# - HMC CONTROLLER: -# Contains a large buffer (modeled with Bridge) to hide the access latency -# of the memory cube. Plus it simply forwards the packets to the serial -# links in a round-robin fashion to balance load among them. # * It is inferred from the standard [1] and the literature [3] that serial # links share the same address range and packets can travel over any of # them so a load distribution mechanism is required among them. +# +# ----------------------------------------- +# | Host/HMC Controller | +# | ---------------------- | +# | | Link Aggregator | opt | +# | ---------------------- | +# | ---------------------- | +# | | Serial Link + Ser | * 4 | +# | ---------------------- | +# |--------------------------------------- +# ----------------------------------------- +# | Device +# | ---------------------- | +# | | Xbar | * 4 | +# | ---------------------- | +# | ---------------------- | +# | | Vault Controller | * 16 | +# | ---------------------- | +# | ---------------------- | +# | | Memory | | +# | ---------------------- | +# |---------------------------------------| +# +# In this version we have present 3 different HMC archiecture along with +# alongwith their corresponding test script. +# +# same: It has 4 crossbars in HMC memory. All the crossbars are connected +# to each other, providing complete memory range. This archicture also covers +# the added latency for sending a request to non-local vault(bridge in b/t +# crossbars). All the 4 serial links can access complete memory. So each +# link can be connected to separate processor. +# +# distributed: It has 4 crossbars inside the HMC. Crossbars are not +# connected.Through each crossbar only local vaults can be accessed. But to +# support this architecture we need a crossbar between serial links and +# processor. +# +# mixed: This is a hybrid architecture. It has 4 crossbars inside the HMC. +# 2 Crossbars are connected to only local vaults. From other 2 crossbar, a +# request can be forwarded to any other vault. import optparse @@ -107,131 +151,277 @@ class HMCSystem(SubSystem): # FIFOs at the input and output of the inteconnect) xbar_response_latency = Param.Cycles(2, "Response latency of the XBar") - #*****************************SERIAL LINK PARAMETERS********************** - # Number of serial links [1] - num_serial_links = Param.Unsigned(4, "Number of serial links") + # number of cross which connects 16 Vaults to serial link[7] + number_mem_crossbar = Param.Unsigned(4, "Number of crossbar in HMC" + ) + + #*****************************SERIAL LINK PARAMETERS*********************** + # Number of serial links controllers [1] + num_links_controllers = Param.Unsigned(4, "Number of serial links") # Number of packets (not flits) to store at the request side of the serial # link. This number should be adjusted to achive required bandwidth - link_buffer_size_req = Param.Unsigned(16, "Number of packets to buffer " + link_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer " "at the request side of the serial link") # Number of packets (not flits) to store at the response side of the serial # link. This number should be adjusted to achive required bandwidth - link_buffer_size_rsp = Param.Unsigned(16, "Number of packets to buffer " + link_buffer_size_rsp = Param.Unsigned(10, "Number of packets to buffer " "at the response side of the serial link") # Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus # the PCB trace latency (3ns Estimated based on [5]) link_latency = Param.Latency('4.6ns', "Latency of the serial links") - # Header overhead of the serial links: Header size is 128bits in HMC [1], - # and we have 16 lanes, so the overhead is 8 cycles - link_overhead = Param.Cycles(8, "The number of cycles required to" - " transmit the packet header over the serial link") - - # Clock frequency of the serial links [1] + # Clock frequency of the each serial link(SerDes) [1] link_frequency = Param.Frequency('10GHz', "Clock Frequency of the serial" "links") - # Number of parallel lanes in each serial link [1] - num_lanes_per_link = Param.Unsigned(16, "Number of lanes per each link") + # Clock frequency of serial link Controller[6] + # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] / + # data_path_width * 10^6 + # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz + link_controller_frequency = Param.Frequency('625MHz', + "Clock Frequency of the link controller") - # Number of serial links [1] - num_serial_links = Param.Unsigned(4, "Number of serial links") + # Latency of the serial link controller to process the packets[1][6] + # (ClockDomain = 625 Mhz ) + # used here for calculations only + link_ctrl_latency = Param.Cycles(4, "The number of cycles required for the" + "controller to process the packet") - #*****************************HMC CONTROLLER PARAMETERS******************* - # Number of packets (not flits) to store at the HMC controller. This - # number should be high enough to be able to hide the high latency of HMC - ctrl_buffer_size_req = Param.Unsigned(256, "Number of packets to buffer " - "at the HMC controller (request side)") + # total_ctrl_latency = link_ctrl_latency + link_latency + # total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns + total_ctrl_latency = Param.Latency('11ns', "The latency experienced by" + "every packet regardless of size of packet") - # Number of packets (not flits) to store at the response side of the HMC - # controller. - ctrl_buffer_size_rsp = Param.Unsigned(256, "Number of packets to buffer " - "at the HMC controller (response side)") + # Number of parallel lanes in each serial link [1] + num_lanes_per_link = Param.Unsigned( 16, "Number of lanes per each link") - # Latency of the HMC controller to process the packets - # (ClockDomain = Host clock domain) - ctrl_latency = Param.Cycles(4, "The number of cycles required for the " - " controller to process the packet") + # Number of serial links [1] + num_serial_links = Param.Unsigned(4, "Number of serial links") - # Wiring latency from the SoC crossbar to the HMC controller - ctrl_static_latency = Param.Latency('500ps', "Static latency of the HMC" - "controller") + # speed of each lane of serial link - SerDes serial interface 10 Gb/s + serial_link_speed = Param.UInt64(10, "Gbs/s speed of each lane of" + "serial link") - #*****************************PERFORMANCE MONITORING********************** + #*****************************PERFORMANCE MONITORING************************ # The main monitor behind the HMC Controller - enable_global_monitor = Param.Bool(True, "The main monitor behind the " + enable_global_monitor = Param.Bool(False, "The main monitor behind the " "HMC Controller") # The link performance monitors - enable_link_monitor = Param.Bool(True, "The link monitors") + enable_link_monitor = Param.Bool(False, "The link monitors" ) + + # link aggregator enable - put a cross between buffers & links + enable_link_aggr = Param.Bool(False, "The crossbar between port and " + "Link Controller") + + enable_buff_div = Param.Bool(True, "Memory Range of Buffer is" + "divided between total range") + + #*****************************HMC ARCHITECTURE ************************ + # Memory chunk for 16 vault - numbers of vault / number of crossbars + mem_chunk = Param.Unsigned(4, "Chunk of memory range for each cross bar " + "in arch 0") + + # size of req buffer within crossbar, used for modelling extra latency + # when the reuqest go to non-local vault + xbar_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer " + "at the request side of the crossbar") + + # size of response buffer within crossbar, used for modelling extra latency + # when the response received from non-local vault + xbar_buffer_size_resp = Param.Unsigned(10, "Number of packets to buffer " + "at the response side of the crossbar") + +# configure host system with Serial Links +def config_host_hmc(options, system): + + system.hmc_host=HMCSystem() + + try: + system.hmc_host.enable_global_monitor = options.enable_global_monitor + except: + pass; + + try: + system.hmc_host.enable_link_monitor = options.enable_link_monitor + except: + pass; + + # Serial link Controller with 16 SerDes links at 10 Gbps + # with serial link ranges w.r.t to architecture + system.hmc_host.seriallink = [SerialLink(ranges = options.ser_ranges[i], + req_size=system.hmc_host.link_buffer_size_req, + resp_size=system.hmc_host.link_buffer_size_rsp, + num_lanes=system.hmc_host.num_lanes_per_link, + link_speed=system.hmc_host.serial_link_speed, + delay=system.hmc_host.total_ctrl_latency) + for i in xrange(system.hmc_host.num_serial_links)] + + # enable global monitor + if system.hmc_host.enable_global_monitor: + system.hmc_host.lmonitor = [ CommMonitor() + for i in xrange(system.hmc_host.num_serial_links)] + + # set the clock frequency for serial link + for i in xrange(system.hmc_host.num_serial_links): + system.hmc_host.seriallink[i].clk_domain = SrcClockDomain(clock=system. + hmc_host.link_controller_frequency, voltage_domain= + VoltageDomain(voltage = '1V')) + + # Connect membus/traffic gen to Serial Link Controller for differrent HMC + # architectures + if options.arch == "distributed": + for i in xrange(system.hmc_host.num_links_controllers): + if system.hmc_host.enable_global_monitor: + system.membus.master = system.hmc_host.lmonitor[i].slave + system.hmc_host.lmonitor[i].master = \ + system.hmc_host.seriallink[i].slave + else: + system.membus.master = system.hmc_host.seriallink[i].slave + if options.arch == "mixed": + if system.hmc_host.enable_global_monitor: + system.membus.master = system.hmc_host.lmonitor[0].slave + system.hmc_host.lmonitor[0].master = \ + system.hmc_host.seriallink[0].slave + + system.membus.master = system.hmc_host.lmonitor[1].slave + system.hmc_host.lmonitor[1].master = \ + system.hmc_host.seriallink[1].slave + + system.tgen[2].port = system.hmc_host.lmonitor[2].slave + system.hmc_host.lmonitor[2].master = \ + system.hmc_host.seriallink[2].slave + + system.tgen[3].port = system.hmc_host.lmonitor[3].slave + system.hmc_host.lmonitor[3].master = \ + system.hmc_host.seriallink[3].slave + else: + system.membus.master = system.hmc_host.seriallink[0].slave + system.membus.master = system.hmc_host.seriallink[1].slave + system.tgen[2].port = system.hmc_host.seriallink[2].slave + system.tgen[3].port = system.hmc_host.seriallink[3].slave + if options.arch == "same" : + for i in xrange(system.hmc_host.num_links_controllers): + if system.hmc_host.enable_global_monitor: + system.tgen[i].port = system.hmc_host.lmonitor[i].slave + system.hmc_host.lmonitor[i].master = \ + system.hmc_host.seriallink[i].slave + else: + system.tgen[i].port = system.hmc_host.seriallink[i].slave + + return system # Create an HMC device and attach it to the current system -def config_hmc(options, system): +def config_hmc(options, system, hmc_host): - system.hmc = HMCSystem() + # Create HMC device + system.hmc_dev = HMCSystem() - system.buffer = Bridge(ranges=system.mem_ranges, - req_size=system.hmc.ctrl_buffer_size_req, - resp_size=system.hmc.ctrl_buffer_size_rsp, - delay=system.hmc.ctrl_static_latency) + # Global monitor try: - system.hmc.enable_global_monitor = options.enable_global_monitor + system.hmc_dev.enable_global_monitor = options.enable_global_monitor except: pass; try: - system.hmc.enable_link_monitor = options.enable_link_monitor + system.hmc_dev.enable_link_monitor = options.enable_link_monitor except: pass; - system.membus.master = system.buffer.slave - - # The HMC controller (Clock domain is the same as the host) - system.hmccontroller = HMCController(width=(system.hmc.num_lanes_per_link. - value * system.hmc.num_serial_links/8), - frontend_latency=system.hmc.ctrl_latency, - forward_latency=system.hmc.link_overhead, - response_latency=system.hmc.link_overhead) - - system.hmccontroller.clk_domain = SrcClockDomain(clock=system.hmc. - link_frequency, voltage_domain = VoltageDomain(voltage = '1V')) - - # Serial Links - system.hmc.seriallink =[ SerialLink(ranges = system.mem_ranges, - req_size=system.hmc.link_buffer_size_req, - resp_size=system.hmc.link_buffer_size_rsp, - num_lanes=system.hmc.num_lanes_per_link, - delay=system.hmc.link_latency) - for i in xrange(system.hmc.num_serial_links)] - - if system.hmc.enable_link_monitor: - system.hmc.lmonitor = [ CommMonitor() - for i in xrange(system.hmc.num_serial_links)] - - # The HMC Crossbar located in its logic-base (LoB) - system.hmc.xbar = NoncoherentXBar(width = system.hmc.xbar_width, - frontend_latency=system.hmc.xbar_frontend_latency, - forward_latency=system.hmc.xbar_forward_latency, - response_latency=system.hmc.xbar_response_latency ) - system.hmc.xbar.clk_domain = SrcClockDomain(clock = - system.hmc.xbar_frequency, voltage_domain = - VoltageDomain(voltage = '1V')) - - if system.hmc.enable_global_monitor: - system.gmonitor = CommMonitor() - system.buffer.master = system.gmonitor.slave - system.gmonitor.master = system.hmccontroller.slave - else: - system.hmccontroller.slave = system.buffer.master - - for i in xrange(system.hmc.num_serial_links): - system.hmccontroller.master = system.hmc.seriallink[i].slave - system.hmc.seriallink[i].clk_domain = system.hmccontroller.clk_domain; - if system.hmc.enable_link_monitor: - system.hmc.seriallink[i].master = system.hmc.lmonitor[i].slave - system.hmc.lmonitor[i].master = system.hmc.xbar.slave + + if system.hmc_dev.enable_link_monitor: + system.hmc_dev.lmonitor = [ CommMonitor() + for i in xrange(system.hmc_dev.num_links_controllers)] + + # 4 HMC Crossbars located in its logic-base (LoB) + system.hmc_dev.xbar = [ NoncoherentXBar(width=system.hmc_dev.xbar_width, + frontend_latency=system.hmc_dev.xbar_frontend_latency, + forward_latency=system.hmc_dev.xbar_forward_latency, + response_latency=system.hmc_dev.xbar_response_latency ) + for i in xrange(system.hmc_host.number_mem_crossbar)] + + for i in xrange(system.hmc_dev.number_mem_crossbar): + system.hmc_dev.xbar[i].clk_domain = SrcClockDomain( + clock=system.hmc_dev.xbar_frequency,voltage_domain= + VoltageDomain(voltage='1V')) + + # Attach 4 serial link to 4 crossbar/s + for i in xrange(system.hmc_dev.num_serial_links): + if system.hmc_dev.enable_link_monitor: + system.hmc_host.seriallink[i].master = \ + system.hmc_dev.lmonitor[i].slave + system.hmc_dev.lmonitor[i].master = system.hmc_dev.xbar[i].slave else: - system.hmc.seriallink[i].master = system.hmc.xbar.slave + system.hmc_host.seriallink[i].master = system.hmc_dev.xbar[i].slave + + # Connecting xbar with each other for request arriving at the wrong xbar, + # then it will be forward to correct xbar. Bridge is used to connect xbars + if options.arch == "same": + numx = len(system.hmc_dev.xbar) + + # create a list of buffers + system.hmc_dev.buffers = [ Bridge( + req_size=system.hmc_dev.xbar_buffer_size_req, + resp_size=system.hmc_dev.xbar_buffer_size_resp) + for i in xrange(numx * (system.hmc_dev.mem_chunk - 1))] + + # Buffer iterator + it = iter(range(len(system.hmc_dev.buffers))) + + # necesarry to add system_port to one of the xbar + system.system_port = system.hmc_dev.xbar[3].slave + + # iterate over all the crossbars and connect them as required + for i in range(numx): + for j in range(numx): + # connect xbar to all other xbars except itself + if i != j: + # get the next index of buffer + index = it.next() + + # Change the default values for ranges of bridge + system.hmc_dev.buffers[index].ranges = system.mem_ranges[ + j * int(system.hmc_dev.mem_chunk): + (j + 1) * int(system.hmc_dev.mem_chunk)] + + # Connect the bridge between corssbars + system.hmc_dev.xbar[i].master = system.hmc_dev.buffers[ + index].slave + system.hmc_dev.buffers[ + index].master = system.hmc_dev.xbar[j].slave + else: + # Don't connect the xbar to itself + pass + + # Two crossbars are connected to all other crossbars-Other 2 vault + # can only direct traffic to it local vaults + if options.arch == "mixed": + + system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4]) + system.hmc_dev.xbar[3].master = system.hmc_dev.buffer30.slave + system.hmc_dev.buffer30.master = system.hmc_dev.xbar[0].slave + + system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8]) + system.hmc_dev.xbar[3].master = system.hmc_dev.buffer31.slave + system.hmc_dev.buffer31.master = system.hmc_dev.xbar[1].slave + + system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12]) + system.hmc_dev.xbar[3].master = system.hmc_dev.buffer32.slave + system.hmc_dev.buffer32.master = system.hmc_dev.xbar[2].slave + + + system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4]) + system.hmc_dev.xbar[2].master = system.hmc_dev.buffer20.slave + system.hmc_dev.buffer20.master = system.hmc_dev.xbar[0].slave + + system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8]) + system.hmc_dev.xbar[2].master = system.hmc_dev.buffer21.slave + system.hmc_dev.buffer21.master = system.hmc_dev.xbar[1].slave + + system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16]) + system.hmc_dev.xbar[2].master = system.hmc_dev.buffer23.slave + system.hmc_dev.buffer23.master = system.hmc_dev.xbar[3].slave + diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py index 4685cd5d1..71e3bf460 100644 --- a/configs/common/MemConfig.py +++ b/configs/common/MemConfig.py @@ -153,9 +153,10 @@ def config_mem(options, system): """ if ( options.mem_type == "HMC_2500_x32"): - HMC.config_hmc(options, system) - subsystem = system.hmc - xbar = system.hmc.xbar + HMChost = HMC.config_host_hmc(options, system) + HMC.config_hmc(options, system, HMChost.hmc_host) + subsystem = system.hmc_dev + xbar = system.hmc_dev.xbar else: subsystem = system xbar = system.membus @@ -222,4 +223,7 @@ def config_mem(options, system): # Connect the controllers to the membus for i in xrange(len(subsystem.mem_ctrls)): - subsystem.mem_ctrls[i].port = xbar.master + if (options.mem_type == "HMC_2500_x32"): + subsystem.mem_ctrls[i].port = xbar[i/4].master + else: + subsystem.mem_ctrls[i].port = xbar.master diff --git a/configs/example/hmctest.py b/configs/example/hmctest.py new file mode 100644 index 000000000..bd6ca24d1 --- /dev/null +++ b/configs/example/hmctest.py @@ -0,0 +1,170 @@ +import optparse +import sys +import subprocess + +import m5 +from m5.objects import * +from m5.util import addToPath + +addToPath('../common') +import MemConfig +import HMC + +parser = optparse.OptionParser() + +# Use a HMC_2500_x32 by default +parser.add_option("--mem-type", type = "choice", default = "HMC_2500_x32", + choices = MemConfig.mem_names(), + help = "type of memory to use") + +parser.add_option("--ranks", "-r", type = "int", default = 1, + help = "Number of ranks to iterate across") + +parser.add_option("--rd_perc", type ="int", default=100, + help = "Percentage of read commands") + +parser.add_option("--mode", type ="choice", default ="DRAM", + choices = ["DRAM", "DRAM_ROTATE", "RANDOM"], + help = "DRAM: Random traffic; \ + DRAM_ROTATE: Traffic rotating across banks and ranks" + ) + +parser.add_option("--addr_map", type ="int", default = 1, + help = "0: RoCoRaBaCh; 1: RoRaBaCoCh/RoRaBaChCo") + +parser.add_option("--arch", type = "choice", default = "distributed", + choices = ["same", "distributed", "mixed"], + help = "same: HMC-4 links with same range\ + distributed: HMC-4 links with distributed range\ + mixed: mixed with same & distributed range") + +parser.add_option("--linkaggr", type = "int", default = 0, + help = "1: enable link crossbar, 0: disable link crossbar") + +parser.add_option("--num_cross", type = "int", default = 4, + help = "1: number of crossbar in HMC=1;\ + 4: number of crossbar = 4") + +parser.add_option("--tlm-memory", type = "string", + help="use external port for SystemC TLM cosimulation") + +parser.add_option("--elastic-trace-en", action ="store_true", + help = """Enable capture of data dependency and instruction + fetch traces using elastic trace probe.""") + +(options, args) = parser.parse_args() + +if args: + print "Error: script doesn't take any positional arguments" + sys.exit(1) + +system = System() +system.clk_domain = SrcClockDomain(clock='100GHz', + voltage_domain= + VoltageDomain(voltage = '1V')) +# Create additional crossbar for arch1 +if options.arch == "distributed" or options.arch == "mixed" : + system.membus = NoncoherentXBar( width=8 ) + system.membus.badaddr_responder = BadAddr() + system.membus.default = Self.badaddr_responder.pio + system.membus.width = 8 + system.membus.frontend_latency = 3 + system.membus.forward_latency = 4 + system.membus.response_latency = 2 + + system.membus.clk_domain = SrcClockDomain(clock='100GHz', voltage_domain= + VoltageDomain(voltage = '1V')) + +# we are considering 4GB HMC device with following parameters +# hmc_device_size = '4GB' +# hmc_num_vaults = 16 +# hmc_vault_size = '256MB' +# hmc_stack_size = 8 +# hmc_bank_in_stack = 2 +# hmc_bank_size = '16MB' +# hmc_bank_in_vault = 16 + +# determine the burst length in bytes +burst_size = 256 +num_serial_links = 4 +num_vault_ctrl = 16 +options.mem_channels = 1 +options.external_memory_system = 0 +options.mem_ranks=1 +stride_size = burst_size +system.cache_line_size = burst_size + +# Enable performance monitoring +options.enable_global_monitor = True +options.enable_link_monitor = False + +# Bytes used for calculations +oneGBytes = 1024 * 1024 * 1024 +oneMBytes = 1024 * 1024 + +# Memory ranges of 16 vault controller - Total_HMC_size / 16 +mem_range_vault = [ AddrRange(i * 256 * oneMBytes, ((i + 1) * 256 * oneMBytes) + - 1) + for i in range(num_vault_ctrl)] + +# Memmory ranges of serial link for arch-0 +# Same as the ranges of vault controllers - 4 vault - to - 1 serial link +if options.arch == "same": + ser_range = [ AddrRange(0, (4 * oneGBytes) - 1) + for i in range(num_serial_links)] + options.ser_ranges = ser_range + +# Memmory ranges of serial link for arch-1 +# Distributed range accross links +if options.arch == "distributed": + ser_range = [ AddrRange(i * oneGBytes, ((i + 1) * oneGBytes) - 1) + for i in range(num_serial_links)] + options.ser_ranges = ser_range + +# Memmory ranges of serial link for arch-2 +# "Mixed" address distribution over links +if options.arch == "mixed": + ser_range0 = AddrRange(0 , (1 * oneGBytes) - 1) + ser_range1 = AddrRange(1 * oneGBytes , (2 * oneGBytes) - 1) + ser_range2 = AddrRange(0 , (4 * oneGBytes) - 1) + ser_range3 = AddrRange(0 , (4 * oneGBytes) - 1) + options.ser_ranges = [ser_range0, ser_range1, ser_range2, ser_range3] + +# Assign ranges of vault controller to system ranges +system.mem_ranges = mem_range_vault + +# open traffic generator +cfg_file_name = "./tests/quick/se/70.tgen/traffic.cfg" +cfg_file = open(cfg_file_name, 'r') + +# number of traffic generator +np = 4 +# create a traffic generator, and point it to the file we just created +system.tgen = [ TrafficGen(config_file = cfg_file_name) for i in xrange(np)] + +# Config memory system with given HMC arch +MemConfig.config_mem(options, system) + +if options.arch == "distributed": + for i in xrange(np): + system.tgen[i].port = system.membus.slave + # connect the system port even if it is not used in this example + system.system_port = system.membus.slave + +if options.arch == "mixed": + for i in xrange(int(np/2)): + system.tgen[i].port = system.membus.slave + # connect the system port even if it is not used in this example + system.system_port = system.membus.slave + + +# run Forrest, run! +root = Root(full_system = False, system = system) +root.system.mem_mode = 'timing' + +m5.instantiate() +m5.simulate(10000000000) + +m5.stats.dump() + +print "Done!" diff --git a/ext/drampower/README.md b/ext/drampower/README.md index a43298b01..5d6eb6e82 100644 --- a/ext/drampower/README.md +++ b/ext/drampower/README.md @@ -252,8 +252,8 @@ The tool is based on the DRAM power model developed jointly by the Computer Engi **To cite the DRAMPower Tool:** ``` -[1] "DRAMPower: Open-source DRAM power & energy estimation tool" -Karthik Chandrasekar, Christian Weis, Yonghui Li, Benny Akesson, Norbert Wehn, and Kees Goossens +[1] DRAMPower: Open-source DRAM Power & Energy Estimation Tool +Karthik Chandrasekar, Christian Weis, Yonghui Li, Sven Goossens, Matthias Jung, Omar Naji, Benny Akesson, Norbert Wehn, and Kees Goossens URL: http://www.drampower.info ``` diff --git a/ext/drampower/src/CmdScheduler.cc b/ext/drampower/src/CmdScheduler.cc index bffc5d3bb..a4619b94e 100644 --- a/ext/drampower/src/CmdScheduler.cc +++ b/ext/drampower/src/CmdScheduler.cc @@ -31,7 +31,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Karthik Chandrasekar + * Authors: Karthik Chandrasekar, Yonghui Li, Sven Goossens * */ #include "CmdScheduler.h" @@ -42,17 +42,20 @@ #include <algorithm> // For max +#define MILLION 1000000 + + using namespace std; using namespace Data; // Read the traces and get the transaction. Each transaction is executed by // scheduling a number of commands to the memory. Hence, the transactions are // translated into a sequence of commands which will be used for power analysis. -void cmdScheduler::transTranslation(MemorySpecification memSpec, +void cmdScheduler::transTranslation(const MemorySpecification& memSpec, ifstream& trans_trace, int grouping, int interleaving, int burst, int powerdown) { commands.open("commands.trace", ifstream::out); - MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; + const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; nBanks = memArchSpec.nbrOfBanks; nColumns = memArchSpec.nbrOfColumns; burstLength = memArchSpec.burstLength; @@ -77,13 +80,14 @@ void cmdScheduler::transTranslation(MemorySpecification memSpec, } // cmdScheduler::transTranslation // initialize the variables and vectors for starting command scheduling. -void cmdScheduler::schedulingInitialization(MemorySpecification memSpec) +void cmdScheduler::schedulingInitialization(const MemorySpecification& memSpec) { - MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; + const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; - ACT.resize(2 * memSpec.memArchSpec.nbrOfBanks); - RDWR.resize(2 * memSpec.memArchSpec.nbrOfBanks); - PRE.resize(memSpec.memArchSpec.nbrOfBanks); + const size_t numBanks = static_cast<size_t>(memSpec.memArchSpec.nbrOfBanks); + ACT.resize(2 * numBanks); + RDWR.resize(2 * numBanks); + PRE.resize(numBanks); bankaccess = memSpec.memArchSpec.nbrOfBanks; if (!ACT.empty()) { ACT.erase(ACT.begin(), ACT.end()); @@ -96,14 +100,15 @@ void cmdScheduler::schedulingInitialization(MemorySpecification memSpec) } ///////////////initialization////////////// - for (unsigned i = 0; i < memSpec.memArchSpec.nbrOfBanks; i++) { + for (int64_t i = 0; i < memSpec.memArchSpec.nbrOfBanks; i++) { cmd.Type = PRECHARGE; - cmd.bank = i; + cmd.bank = static_cast<unsigned>(i); cmd.name = "PRE"; - if (memSpec.id == "WIDEIO_SDR") - cmd.time = 1 - static_cast<double>(memSpec.memTimingSpec.TAW); - else - cmd.time = 1 - static_cast<double>(memSpec.memTimingSpec.FAW); + if (memSpec.id == "WIDEIO_SDR") { + cmd.time = 1 - memSpec.memTimingSpec.TAW; + } else { + cmd.time = 1 - memSpec.memTimingSpec.FAW; + } PRE.push_back(cmd); @@ -114,7 +119,7 @@ void cmdScheduler::schedulingInitialization(MemorySpecification memSpec) cmd.Type = WRITE; cmd.name = "WRITE"; cmd.time = -1; - RDWR[i].push_back(cmd); + RDWR[static_cast<size_t>(i)].push_back(cmd); } tREF = memTimingSpec.REFI; transFinish.time = 0; @@ -130,14 +135,14 @@ void cmdScheduler::schedulingInitialization(MemorySpecification memSpec) // transactions are generated according to the information read from the traces. // Then the command scheduling function is triggered to generate commands and // schedule them to the memory according to the timing constraints. -void cmdScheduler::getTrans(std::ifstream& trans_trace, MemorySpecification memSpec) +void cmdScheduler::getTrans(std::ifstream& trans_trace, const MemorySpecification& memSpec) { std::string line; transTime = 0; - unsigned newtranstime; - unsigned transAddr; - unsigned transType = 1; + uint64_t newtranstime; + uint64_t transAddr; + int64_t transType = 1; trans TransItem; if (!transTrace.empty()) { @@ -147,12 +152,12 @@ void cmdScheduler::getTrans(std::ifstream& trans_trace, MemorySpecification memS while (getline(trans_trace, line)) { istringstream linestream(line); string item; - unsigned itemnum = 0; + uint64_t itemnum = 0; while (getline(linestream, item, ',')) { if (itemnum == 0) { stringstream timestamp(item); timestamp >> newtranstime; - transTime = transTime + newtranstime; + transTime = transTime + static_cast<int64_t>(newtranstime); } else if (itemnum == 1) { if (item == "write" || item == "WRITE") { transType = WRITE; @@ -191,33 +196,35 @@ void cmdScheduler::getTrans(std::ifstream& trans_trace, MemorySpecification memS // be scheduled until all the commands for the current one are scheduled. // After the scheduling, a sequence of commands are obtained and they are written // into commands.txt which will be used for power analysis. -void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) +void cmdScheduler::analyticalScheduling(const MemorySpecification& memSpec) { - int Bs = -1; - int transType = -1; - double timer = 0; - int bankGroupPointer = 0; - int bankGroupAddr = 0; + int64_t transType = -1; + int64_t timer = 0; + uint64_t bankGroupPointer = 0; + uint64_t bankGroupAddr = 0; bool collisionFound; physicalAddr PhysicalAddress; bool bankGroupSwitch = false; - std::vector<unsigned> bankPointer(nbrOfBankGroups, 0); - std::vector<int> bankAccessNum(nBanks, -1); - std::vector<bool> ACTSchedule(nBanks, false); - int bankAddr = -1; - double endTime = 0; - double tComing_REF = 0; + std::vector<uint64_t> bankPointer(static_cast<size_t>(nbrOfBankGroups), 0); + std::vector<int64_t> bankAccessNum(static_cast<size_t>(nBanks), -1); + std::vector<bool> ACTSchedule(static_cast<size_t>(nBanks), false); + uint64_t bankAddr = 0; + int64_t endTime = 0; + int64_t tComing_REF = 0; Inselfrefresh = 0; - MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; + const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; - for (unsigned t = 0; t < transTrace.size(); t++) { + for (uint64_t t = 0; t < transTrace.size(); t++) { cmdScheduling.erase(cmdScheduling.begin(), cmdScheduling.end()); - for (unsigned i = 0; i < nBanks; i++) { - ACTSchedule[i] = false; - bankAccessNum[i] = -1; + for (auto a : ACTSchedule) { + a = false; + } + + for (auto& b : bankAccessNum) { + b = -1; } timingsGet = false; @@ -225,13 +232,13 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) PhysicalAddress = memoryMap(transTrace[t], memSpec); - for (unsigned i = 0; i < nbrOfBankGroups; i++) { - bankPointer[i] = PhysicalAddress.bankAddr; // the bank pointer per group. + for (auto& b : bankPointer) { + b = PhysicalAddress.bankAddr; // the bank pointer per group. } bankGroupPointer = PhysicalAddress.bankGroupAddr; - endTime = max(transFinish.time, PRE[transFinish.bank].time + - static_cast<int>(memTimingSpec.RP)); + endTime = max(transFinish.time, PRE[static_cast<size_t>(transFinish.bank)].time + + static_cast<int>(memTimingSpec.RP)); // Before starting the scheduling for the next transaction, it has to // check whether it is necessary for implementing power down. @@ -244,14 +251,12 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) ///////////////Scheduling Refresh//////////////////////// if (((transFinish.time >= tREF) || (timer >= tREF))) { - for (double i = 0; i <= ((timer - tComing_REF) > 0 ? (timer - tComing_REF) / + for (int64_t i = 0; i <= ((timer - tComing_REF) > 0 ? (timer - tComing_REF) / memTimingSpec.REFI : 0); i++) { cmd.bank = 0; cmd.name = "REF"; - cmd.time = max(max(max(transFinish.time, PRE[transFinish.bank].time - + static_cast<int>(memTimingSpec.RP)), tREF), startTime); - if (((power_down == SELF_REFRESH) && !Inselfrefresh) || - (power_down != SELF_REFRESH)) { + cmd.time = max(max(max(transFinish.time, PRE[static_cast<size_t>(transFinish.bank)].time + memTimingSpec.RP), tREF), startTime); + if ((power_down == SELF_REFRESH && !Inselfrefresh) || power_down != SELF_REFRESH) { cmdScheduling.push_back(cmd); startTime = cmd.time + memTimingSpec.RFC; } @@ -262,7 +267,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) } } ///////////////Execution Transactions/////////////////// - Bs = PhysicalAddress.bankAddr; + uint64_t Bs = PhysicalAddress.bankAddr; transType = transTrace[t].type; tRWTP = getRWTP(transType, memSpec); @@ -280,9 +285,8 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) bankGroupSwitch = true; } // update to the current bank group address. - bankGroupAddr = PhysicalAddress.bankGroupAddr + j; - bankAddr = bankGroupAddr * nBanks / nbrOfBankGroups + - bankPointer[bankGroupAddr]; + bankGroupAddr = PhysicalAddress.bankGroupAddr + static_cast<uint64_t>(j); + bankAddr = bankGroupAddr * static_cast<uint64_t>(nBanks) / nbrOfBankGroups + bankPointer[bankGroupAddr]; } else { bankAddr = Bs + i; } @@ -312,7 +316,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) static_cast<int>(memTimingSpec.TAW)); } - if ((i == 0) && (j == 0)) { + if (i == 0 && j == 0) { cmd.time = max(cmd.time, PreRDWR.time + 1); cmd.time = max(cmd.time, timer); cmd.time = max(startTime, cmd.time); @@ -358,7 +362,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) } for (int ACTBank = static_cast<int>(ACT.size() - 1); ACTBank >= 0; ACTBank--) { - if (ACT[ACTBank].bank == bankAddr) { + if (ACT[ACTBank].bank == static_cast<int64_t>(bankAddr)) { cmd.time = max(PreRDWR.time + tSwitch_init, ACT.back().time + static_cast<int>(memTimingSpec.RCD)); break; @@ -392,7 +396,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) PRE[bankAddr].name = "PRE"; for (int ACTBank = static_cast<int>(ACT.size() - 1); ACTBank >= 0; ACTBank--) { - if (ACT[ACTBank].bank == bankAddr) { + if (ACT[ACTBank].bank == static_cast<int64_t>(bankAddr)) { PRE[bankAddr].time = max(ACT.back().time + static_cast<int>(memTimingSpec.RAS), PreRDWR.time + tRWTP); @@ -419,7 +423,7 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) /////////////Update Vector Length///////////////// // the vector length is reduced so that less memory is used for running // this tool. - if (ACT.size() >= memSpec.memArchSpec.nbrOfBanks) { + if (ACT.size() >= static_cast<size_t>(memSpec.memArchSpec.nbrOfBanks)) { for (int m = 0; m < BI * BGI; m++) { ACT.erase(ACT.begin()); RDWR[0].erase(RDWR[0].begin(), RDWR[0].end()); @@ -443,14 +447,14 @@ void cmdScheduler::analyticalScheduling(MemorySpecification memSpec) // to add the power down/up during the command scheduling for transactions. // It is called when the command scheduling for a transaction is finished, and it // is also called if there is a refresh. -void cmdScheduler::pdScheduling(double endTime, double timer, - MemorySpecification memSpec) +void cmdScheduler::pdScheduling(int64_t endTime, int64_t timer, + const MemorySpecification& memSpec) { - double ZERO = 0; - MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; + int64_t ZERO = 0; + const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; endTime = max(endTime, startTime); - double pdTime = max(ZERO, timer - endTime); + int64_t pdTime = max(ZERO, timer - endTime); if ((timer > (endTime + memTimingSpec.CKE)) && (power_down == POWER_DOWN)) { cmd.bank = 0; @@ -490,11 +494,11 @@ void cmdScheduler::pdScheduling(double endTime, double timer, // get the time when a precharge occurs after a read/write command is scheduled. // In addition, it copes with different kind of memories. -int cmdScheduler::getRWTP(int transType, MemorySpecification memSpec) +int64_t cmdScheduler::getRWTP(int64_t transType, const MemorySpecification& memSpec) { - int tRWTP_init = 0; - MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; - MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; + int64_t tRWTP_init = 0; + const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; + const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; if (transType == READ) { switch (memSpec.memoryType) { @@ -506,13 +510,13 @@ int cmdScheduler::getRWTP(int transType, MemorySpecification memSpec) case MemoryType::LPDDR2: case MemoryType::LPDDR3: tRWTP_init = memArchSpec.burstLength / memArchSpec.dataRate + - max(0, static_cast<int>(memTimingSpec.RTP - 2)); + max(int64_t(0), memTimingSpec.RTP - 2); break; case MemoryType::DDR2: tRWTP_init = memTimingSpec.AL + memArchSpec.burstLength / memArchSpec.dataRate + - max(static_cast<int>(memTimingSpec.RTP), 2) - 2; + max(memTimingSpec.RTP, int64_t(2)) - 2; break; case MemoryType::DDR3: @@ -525,10 +529,10 @@ int cmdScheduler::getRWTP(int transType, MemorySpecification memSpec) } else if (transType == WRITE) { if (memSpec.memoryType == MemoryType::WIDEIO_SDR) { tRWTP_init = memTimingSpec.WL + memArchSpec.burstLength / - memArchSpec.dataRate - 1 + memSpec.memTimingSpec.WR; + memArchSpec.dataRate - 1 + memTimingSpec.WR; } else { tRWTP_init = memTimingSpec.WL + memArchSpec.burstLength / - memArchSpec.dataRate + memSpec.memTimingSpec.WR; + memArchSpec.dataRate + memTimingSpec.WR; } if ((memSpec.memoryType == MemoryType::LPDDR2) || (memSpec.memoryType == MemoryType::LPDDR3)) { @@ -543,11 +547,11 @@ int cmdScheduler::getRWTP(int transType, MemorySpecification memSpec) // In particular, tSwitch_init is generally used to provide the timings for // scheduling a read/write command after a read/write command which have been // scheduled to any possible banks within any possible bank groups (DDR4). -void cmdScheduler::getTimingConstraints(bool BGSwitch, MemorySpecification memSpec, - int PreType, int CurrentType) +void cmdScheduler::getTimingConstraints(bool BGSwitch, const MemorySpecification& memSpec, + int64_t PreType, int64_t CurrentType) { - MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; - MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; + const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; + const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; if (memSpec.memoryType != MemoryType::DDR4) { tRRD_init = memTimingSpec.RRD; @@ -586,7 +590,7 @@ void cmdScheduler::getTimingConstraints(bool BGSwitch, MemorySpecification memSp if (PreType == CurrentType) { tSwitch_init = tCCD_init; timingsGet = true; - } else if ((PreType == WRITE) && (CurrentType == READ)) { + } else if (PreType == WRITE && CurrentType == READ) { tSwitch_init = memTimingSpec.WL + memArchSpec.burstLength / memArchSpec.dataRate + tWTR_init; } @@ -601,59 +605,55 @@ void cmdScheduler::getTimingConstraints(bool BGSwitch, MemorySpecification memSp // The logical address of each transaction is translated into a physical address // which consists of bank group (for DDR4), bank, row and column addresses. cmdScheduler::physicalAddr cmdScheduler::memoryMap(trans Trans, - MemorySpecification memSpec) + const MemorySpecification& memSpec) { - int DecLogic; + int64_t DecLogic; physicalAddr PhysicalAddr; DecLogic = Trans.logicalAddress; // row-bank-column-BI-BC-BGI-BL - if ((BGI > 1) && (memSpec.memoryType == MemoryType::DDR4)) { - unsigned colBits = static_cast<unsigned>(log2(nColumns)); - unsigned bankShift = static_cast<unsigned>(colBits + ((BI > 1) ? log2(BI) : 0) - + ((BGI > 1) ? log2(BGI) : 0)); - unsigned bankMask = static_cast<unsigned>(nBanks / (BI * nbrOfBankGroups) - 1) - << bankShift; - unsigned bankAddr = (DecLogic & bankMask) >> - static_cast<unsigned>(colBits + ((BGI > 1) ? log2(BGI) : 0)); + if (BGI > 1 && memSpec.memoryType == MemoryType::DDR4) { + uint64_t colBits = uintLog2(nColumns); + uint64_t bankShift = colBits + ((BI > 1) ? uintLog2(BI) : 0) + ((BGI > 1) ? uintLog2(BGI) : 0); + uint64_t bankMask = (nBanks / (BI * nbrOfBankGroups) - 1) << bankShift; + uint64_t bankAddr = (DecLogic & bankMask) >> (colBits + ((BGI > 1) ? uintLog2(BGI) : 0)); PhysicalAddr.bankAddr = bankAddr; - unsigned bankGroupShift = static_cast<unsigned>(log2(burstLength)); - unsigned bankGroupMask = (nbrOfBankGroups / BGI - 1) << bankGroupShift; - unsigned bankGroupAddr = (DecLogic & bankGroupMask) >> bankGroupShift; + uint64_t bankGroupShift = uintLog2(burstLength); + uint64_t bankGroupMask = (nbrOfBankGroups / BGI - 1) << bankGroupShift; + uint64_t bankGroupAddr = (DecLogic & bankGroupMask) >> bankGroupShift; PhysicalAddr.bankGroupAddr = bankGroupAddr; - unsigned colShift = static_cast<unsigned>(log2(BC * burstLength) + - ((BI > 1) ? log2(BI) : 0) + ((BGI > 1) ? log2(BGI) : 0)); - unsigned colMask = static_cast<unsigned>(nColumns / (BC * burstLength) - 1) - << colShift; - unsigned colAddr = (DecLogic & colMask) >> - static_cast<unsigned>((colShift - log2(static_cast<unsigned>(BC) * burstLength))); + uint64_t colShift = uintLog2(BC * burstLength) + + ((BI > 1) ? uintLog2(BI) : 0) + ((BGI > 1) ? uintLog2(BGI) : 0); + uint64_t colMask = (nColumns / (BC * burstLength) - 1) << colShift; + uint64_t colAddr = (DecLogic & colMask) >> (colShift - uintLog2(static_cast<uint64_t>(BC) * burstLength)); PhysicalAddr.colAddr = colAddr; } else { - unsigned colBits = static_cast<unsigned>(log2(nColumns)); - unsigned bankShift = static_cast<unsigned>(colBits + ((BI > 1) ? log2(BI) : 0)); - unsigned bankMask = static_cast<unsigned>(nBanks / BI - 1) << bankShift; - unsigned bankAddr = (DecLogic & bankMask) >> colBits; + uint64_t colBits = uintLog2(nColumns); + uint64_t bankShift = colBits + ((BI > 1) ? uintLog2(BI) : 0); + uint64_t bankMask = (nBanks / BI - 1) << bankShift; + uint64_t bankAddr = (DecLogic & bankMask) >> colBits; PhysicalAddr.bankAddr = bankAddr; - unsigned colShift = static_cast<unsigned>(log2(BC * burstLength) + - ((BI > 1) ? log2(BI) : 0)); - unsigned colMask = static_cast<unsigned>(nColumns / (BC * burstLength) - 1) - << colShift; - unsigned colAddr = (DecLogic & colMask) >> - static_cast<unsigned>((colShift - log2(static_cast<unsigned>(BC) * burstLength))); + uint64_t colShift = (uintLog2(BC * burstLength) + ((BI > 1) ? uintLog2(BI) : 0)); + uint64_t colMask = (nColumns / (BC * burstLength) - 1) << colShift; + uint64_t colAddr = (DecLogic & colMask) >> (colShift - uintLog2(BC * burstLength)); PhysicalAddr.colAddr = colAddr; PhysicalAddr.bankGroupAddr = 0; } - unsigned rowShift = static_cast<unsigned>(log2(nColumns * nBanks)); - unsigned rowMask = static_cast<unsigned>(memSpec.memArchSpec.nbrOfRows - 1) - << rowShift; - unsigned rowAddr = (DecLogic & rowMask) >> rowShift; + uint64_t rowShift = uintLog2(nColumns * nBanks); + uint64_t rowMask = (memSpec.memArchSpec.nbrOfRows - 1) << rowShift; + uint64_t rowAddr = (DecLogic & rowMask) >> rowShift; PhysicalAddr.rowAddr = rowAddr; return PhysicalAddr; } // cmdScheduler::memoryMap + +uint64_t cmdScheduler::uintLog2(uint64_t in) +{ + return static_cast<uint64_t>(log2(in)); +}
\ No newline at end of file diff --git a/ext/drampower/src/CmdScheduler.h b/ext/drampower/src/CmdScheduler.h index 3c60ea886..58efd279b 100644 --- a/ext/drampower/src/CmdScheduler.h +++ b/ext/drampower/src/CmdScheduler.h @@ -59,9 +59,9 @@ class cmdScheduler { // the format of a transaction. class trans { public: - int type; - double timeStamp; - unsigned logicalAddress; + int64_t type; + int64_t timeStamp; + uint64_t logicalAddress; }; std::vector<trans> transTrace; // to store the transactions. @@ -69,18 +69,18 @@ class cmdScheduler { // the format of physical address. class physicalAddr { public: - unsigned rowAddr; - unsigned bankAddr; - unsigned bankGroupAddr; - unsigned colAddr; + uint64_t rowAddr; + uint64_t bankAddr; + uint64_t bankGroupAddr; + uint64_t colAddr; }; // the format of a command. class commandItem { public: - int Type; - int bank; - double time; + int64_t Type; + int64_t bank; + int64_t time; std::string name; physicalAddr PhysicalAddr; // sorting the commands according to their scheduling time. @@ -107,11 +107,11 @@ class cmdScheduler { std::vector<commandItem> cmdScheduling; std::vector<commandItem> cmdList; unsigned elements; - int BI, BC, BGI; + int64_t BI, BC, BGI; // the function used to translate a transaction into a sequence of // commands which are scheduled to the memory. - void transTranslation(Data::MemorySpecification memSpec, + void transTranslation(const MemorySpecification& memSpec, std::ifstream& trans_trace, int grouping, int interleaving, @@ -119,45 +119,47 @@ class cmdScheduler { int powerdown); // get the transactions by reading the traces. void getTrans(std::ifstream& pwr_trace, - MemorySpecification memSpec); + const MemorySpecification& memSpec); // the initialization function for scheduling. - void schedulingInitialization(MemorySpecification memSpec); + void schedulingInitialization(const MemorySpecification& memSpec); // the function used to schedule commands according to the timing constraints. - void analyticalScheduling(MemorySpecification memSpec); + void analyticalScheduling(const MemorySpecification& memSpec); // translate the logical address into physical address. physicalAddr memoryMap(trans Trans, - MemorySpecification memSpec); + const MemorySpecification& memSpec); // the power down and power up are scheduled by pdScheduling - void pdScheduling(double endTime, - double timer, - MemorySpecification memSpec); + void pdScheduling(int64_t endTime, + int64_t timer, + const MemorySpecification& memSpec); // get the timings for scheduling a precharge since a read or write command // is scheduled. - int getRWTP(int transType, - MemorySpecification memSpec); + int64_t getRWTP(int64_t transType, + const MemorySpecification& memSpec); // get different kind of timing constraints according to the used memory. void getTimingConstraints(bool BGSwitch, - MemorySpecification memSpec, - int PreType, - int CurrentType); + const MemorySpecification& memSpec, + int64_t PreType, + int64_t CurrentType); - double transTime; + uint64_t uintLog2(uint64_t in); + + int64_t transTime; // the flag for power down. - int power_down; - int Inselfrefresh; - int tRRD_init; - int tCCD_init; - int tWTR_init; - double tREF; - double tSwitch_init; - double tRWTP; - int bankaccess; - unsigned nBanks; - unsigned nColumns; - unsigned burstLength; - unsigned nbrOfBankGroups; + int64_t power_down; + int64_t Inselfrefresh; + int64_t tRRD_init; + int64_t tCCD_init; + int64_t tWTR_init; + int64_t tREF; + int64_t tSwitch_init; + int64_t tRWTP; + int64_t bankaccess; + int64_t nBanks; + int64_t nColumns; + int64_t burstLength; + int64_t nbrOfBankGroups; bool timingsGet; - double startTime; + int64_t startTime; // the scheduling results for all the transactions are written into // commands which will be used by the power analysis part. diff --git a/ext/drampower/src/CommandAnalysis.cc b/ext/drampower/src/CommandAnalysis.cc index 4dea5c101..e557c2920 100644 --- a/ext/drampower/src/CommandAnalysis.cc +++ b/ext/drampower/src/CommandAnalysis.cc @@ -45,13 +45,34 @@ using namespace Data; using namespace std; -CommandAnalysis::CommandAnalysis() +bool commandSorter(const MemCommand& i, const MemCommand& j) { + if (i.getTimeInt64() == j.getTimeInt64()) { + return i.getType() == MemCommand::PRE && j.getType() != MemCommand::PRE; + } else { + return i.getTimeInt64() < j.getTimeInt64(); + } } -CommandAnalysis::CommandAnalysis(const int nbrofBanks) +CommandAnalysis::CommandAnalysis(const int64_t nbrofBanks) { // Initializing all counters and variables + clearStats(0); + zero = 0; + + bankstate.resize(static_cast<size_t>(nbrofBanks), 0); + last_states.resize(static_cast<size_t>(nbrofBanks)); + mem_state = 0; + num_active_banks = 0; + + cmd_list.clear(); + cached_cmd.clear(); + activation_cycle.resize(static_cast<size_t>(nbrofBanks), 0); +} + +// function to clear counters +void CommandAnalysis::clearStats(const int64_t timestamp) +{ numberofacts = 0; numberofpres = 0; @@ -64,10 +85,6 @@ CommandAnalysis::CommandAnalysis(const int nbrofBanks) s_pre_pdns = 0; numberofsrefs = 0; - pop = 0; - init = 0; - zero = 0; - actcycles = 0; precycles = 0; f_act_pdcycles = 0; @@ -85,28 +102,29 @@ CommandAnalysis::CommandAnalysis(const int nbrofBanks) idlecycles_act = 0; idlecycles_pre = 0; + // reset count references to timestamp so that they are moved + // to start of next stats generation + first_act_cycle = timestamp; + last_pre_cycle = timestamp; + pdn_cycle = timestamp; + sref_cycle = timestamp; + end_act_op = timestamp; + end_read_op = timestamp; + end_write_op = timestamp; + latest_act_cycle = -1; - latest_pre_cycle = -1; latest_read_cycle = -1; latest_write_cycle = -1; - end_read_op = 0; - end_write_op = 0; - end_act_op = 0; - - first_act_cycle = 0; - last_pre_cycle = 0; - bankstate.resize(nbrofBanks, 0); - last_states.resize(nbrofBanks); - mem_state = 0; - - sref_cycle = 0; - pdn_cycle = 0; - - cmd_list.clear(); - full_cmd_list.resize(1, MemCommand::PRE); - cached_cmd.clear(); - activation_cycle.resize(nbrofBanks, 0); + if (timestamp == 0) { + // set to -1 at beginning of simulation + latest_pre_cycle = -1; + } else { + // NOTE: reference is adjusted by tRP (PRE delay) when updating counter + // could remove tRP to ensure counter starts at beginning of next block; + // currently simply setting to timestamp for simplicity + latest_pre_cycle = timestamp; + } } // function to clear all arrays @@ -114,7 +132,6 @@ void CommandAnalysis::clear() { cached_cmd.clear(); cmd_list.clear(); - full_cmd_list.clear(); last_states.clear(); bankstate.clear(); } @@ -125,132 +142,57 @@ void CommandAnalysis::clear() // issued command timestamp, when the auto-precharge would kick in void CommandAnalysis::getCommands(const Data::MemorySpecification& memSpec, - const int nbrofBanks, std::vector<MemCommand>& list, bool lastupdate) + std::vector<MemCommand>& list, bool lastupdate) { - for (vector<MemCommand>::const_iterator i = list.begin(); i != list.end(); ++i) { - const MemCommand& cmd = *i; - cmd_list.push_back(cmd); - + for (size_t i = 0; i < list.size(); ++i) { + MemCommand& cmd = list[i]; MemCommand::cmds cmdType = cmd.getType(); if (cmdType == MemCommand::ACT) { activation_cycle[cmd.getBank()] = cmd.getTimeInt64(); } else if (cmdType == MemCommand::RDA || cmdType == MemCommand::WRA) { // Remove auto-precharge flag from command - cmd_list.back().setType(cmd.typeWithoutAutoPrechargeFlag()); + cmd.setType(cmd.typeWithoutAutoPrechargeFlag()); // Add the auto precharge to the list of cached_cmds int64_t preTime = max(cmd.getTimeInt64() + cmd.getPrechargeOffset(memSpec, cmdType), activation_cycle[cmd.getBank()] + memSpec.memTimingSpec.RAS); - cached_cmd.push_back(MemCommand(MemCommand::PRE, cmd.getBank(), static_cast<double>(preTime))); + list.push_back(MemCommand(MemCommand::PRE, cmd.getBank(), preTime)); } } - pop = 0; - // Note: the extra pre-cmds at the end of the lists, and the cast to double - // of the size vector is probably not desirable. - cmd_list.push_back(MemCommand::PRE); - cached_cmd.push_back(MemCommand::PRE); - analyse_commands(nbrofBanks, memSpec, cmd_list.size()-1, - cached_cmd.size()-1, lastupdate); - cmd_list.clear(); - cached_cmd.clear(); -} // CommandAnalysis::getCommands - -// Checks the auto-precharge cached command list and inserts the explicit -// precharges with the appropriate timestamp in the original command list -// (by merging) based on their offset from the issuing command. Calls the -// evaluate function to analyse this expanded list of commands. + sort(list.begin(), list.end(), commandSorter); -void CommandAnalysis::analyse_commands(const int nbrofBanks, - Data::MemorySpecification memSpec, int64_t nCommands, int64_t nCached, bool lastupdate) -{ - full_cmd_list.resize(1, MemCommand::PRE); - unsigned mCommands = 0; - unsigned mCached = 0; - for (unsigned i = 0; i < nCommands + nCached + 1; i++) { - if (cached_cmd.size() > 1) { - if ((cmd_list[mCommands].getTime() > 1) && (init == 0)) { - full_cmd_list[i].setType(MemCommand::PREA); - init = 1; - pop = 1; - } else { - init = 1; - if ((cached_cmd[mCached].getTime() > 0) && (cmd_list. - at(mCommands).getTime() < cached_cmd[mCached]. - getTime()) && ((cmd_list[mCommands].getTime() > 0) || - ((cmd_list[mCommands].getTime() == 0) && (cmd_list[mCommands]. - getType() != MemCommand::PRE)))) { - full_cmd_list[i] = cmd_list[mCommands]; - mCommands++; - } else if ((cached_cmd[mCached].getTime() > 0) && (cmd_list[mCommands]. - getTime() >= cached_cmd[mCached].getTime())) { - full_cmd_list[i] = cached_cmd[mCached]; - mCached++; - } else if (cached_cmd[mCached].getTime() == 0) { - if ((cmd_list[mCommands].getTime() > 0) || ((cmd_list[mCommands]. - getTime() == 0) && (cmd_list[mCommands]. - getType() != MemCommand::PRE))) { - full_cmd_list[i] = cmd_list[mCommands]; - mCommands++; - } - } else if (cmd_list[mCommands].getTime() == 0) { - full_cmd_list[i] = cached_cmd[mCached]; - mCached++; - } - } - } else { - if ((cmd_list[mCommands].getTime() > 1) && (init == 0)) { - full_cmd_list[i].setType(MemCommand::PREA); - init = 1; - pop = 1; - } else { - init = 1; - if ((cmd_list[mCommands].getTime() > 0) || ((cmd_list. - at(mCommands).getTime() == 0) && (cmd_list[mCommands]. - getType() != MemCommand::PRE))) { - full_cmd_list[i] = cmd_list[mCommands]; - mCommands++; - } - } - } - full_cmd_list.resize(full_cmd_list.size() + 1, MemCommand::PRE); + if (lastupdate && list.empty() == false) { + // Add cycles at the end of the list + int64_t t = timeToCompletion(memSpec, list.back().getType()) + list.back().getTimeInt64() - 1; + list.push_back(MemCommand(MemCommand::NOP, 0, t)); } - full_cmd_list.pop_back(); - if (pop == 0) { - full_cmd_list.pop_back(); - } - if (lastupdate) { - full_cmd_list.resize(full_cmd_list.size() + 1, MemCommand::NOP); - full_cmd_list[full_cmd_list.size() - 1].setTime(full_cmd_list - [full_cmd_list.size() - 2].getTime() + timeToCompletion(memSpec, - full_cmd_list[full_cmd_list.size() - 2].getType()) - 1); - } + evaluate(memSpec, list); +} // CommandAnalysis::getCommands - evaluate(memSpec, full_cmd_list, nbrofBanks); -} // CommandAnalysis::analyse_commands // To get the time of completion of the issued command // Derived based on JEDEC specifications -int CommandAnalysis::timeToCompletion(const MemorySpecification& +int64_t CommandAnalysis::timeToCompletion(const MemorySpecification& memSpec, MemCommand::cmds type) { - int offset = 0; + int64_t offset = 0; const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; if (type == MemCommand::RD) { - offset = static_cast<int>(memTimingSpec.RL + + offset = memTimingSpec.RL + memTimingSpec.DQSCK + 1 + (memArchSpec.burstLength / - memArchSpec.dataRate)); + memArchSpec.dataRate); } else if (type == MemCommand::WR) { - offset = static_cast<int>(memTimingSpec.WL + + offset = memTimingSpec.WL + (memArchSpec.burstLength / memArchSpec.dataRate) + - memTimingSpec.WR); + memTimingSpec.WR; } else if (type == MemCommand::ACT) { - offset = static_cast<int>(memTimingSpec.RCD); + offset = memTimingSpec.RCD; } else if ((type == MemCommand::PRE) || (type == MemCommand::PREA)) { - offset = static_cast<int>(memTimingSpec.RP); + offset = memTimingSpec.RP; } return offset; } // CommandAnalysis::timeToCompletion @@ -258,38 +200,39 @@ int CommandAnalysis::timeToCompletion(const MemorySpecification& // Used to analyse a given list of commands and identify command timings // and memory state transitions void CommandAnalysis::evaluate(const MemorySpecification& memSpec, - vector<MemCommand>& cmd_list, int nbrofBanks) + vector<MemCommand>& cmd_list) { // for each command identify timestamp, type and bank - for (unsigned cmd_list_counter = 0; cmd_list_counter < cmd_list.size(); - cmd_list_counter++) { + for (auto cmd : cmd_list) { // For command type - int type = cmd_list[cmd_list_counter].getType(); + int type = cmd.getType(); // For command bank - int bank = cmd_list[cmd_list_counter].getBank(); + int bank = static_cast<int>(cmd.getBank()); // Command Issue timestamp in clock cycles (cc) - int64_t timestamp = cmd_list[cmd_list_counter].getTimeInt64(); + int64_t timestamp = cmd.getTimeInt64(); if (type == MemCommand::ACT) { + printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank); // If command is ACT - update number of acts, bank state of the // target bank, first and latest activation cycle and the memory // state. Update the number of precharged/idle-precharged cycles. numberofacts++; - if (bankstate[bank] == 1) { + if (bankstate[static_cast<size_t>(bank)] == 1) { printWarning("Bank is already active!", type, timestamp, bank); } - bankstate[bank] = 1; - if (mem_state == 0) { + bankstate[static_cast<size_t>(bank)] = 1; + if (num_active_banks == 0) { first_act_cycle = timestamp; precycles += max(zero, timestamp - last_pre_cycle); idle_pre_update(memSpec, timestamp, latest_pre_cycle); } latest_act_cycle = timestamp; - mem_state++; + num_active_banks++; } else if (type == MemCommand::RD) { + printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank); // If command is RD - update number of reads and read cycle. Check // for active idle cycles (if any). - if (bankstate[bank] == 0) { + if (bankstate[static_cast<size_t>(bank)] == 0) { printWarning("Bank is not active!", type, timestamp, bank); } numberofreads++; @@ -297,9 +240,10 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, latest_act_cycle, timestamp); latest_read_cycle = timestamp; } else if (type == MemCommand::WR) { + printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank); // If command is WR - update number of writes and write cycle. Check // for active idle cycles (if any). - if (bankstate[bank] == 0) { + if (bankstate[static_cast<size_t>(bank)] == 0) { printWarning("Bank is not active!", type, timestamp, bank); } numberofwrites++; @@ -307,6 +251,7 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, latest_act_cycle, timestamp); latest_write_cycle = timestamp; } else if (type == MemCommand::REF) { + printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank); // If command is REF - update number of refreshes, set bank state of // all banks to ACT, set the last PRE cycles at RFC-RP cycles from // timestamp, set the number of active cycles to RFC-RP and check @@ -321,56 +266,54 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, memSpec.memTimingSpec.RP; latest_pre_cycle = last_pre_cycle; actcycles += memSpec.memTimingSpec.RFC - memSpec.memTimingSpec.RP; - mem_state = 0; - for (int j = 0; j < nbrofBanks; j++) { - bankstate[j] = 0; + num_active_banks = 0; + for (auto& b : bankstate) { + b = 0; } } else if (type == MemCommand::PRE) { + printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank); // If command is explicit PRE - update number of precharges, bank // state of the target bank and last and latest precharge cycle. // Calculate the number of active cycles if the memory was in the // active state before, but there is a state transition to PRE now. // If not, update the number of precharged cycles and idle cycles. // Update memory state if needed. - if (bankstate[bank] == 1) { + if (bankstate[static_cast<size_t>(bank)] == 1) { numberofpres++; } - bankstate[bank] = 0; + bankstate[static_cast<size_t>(bank)] = 0; - if (mem_state == 1) { + if (num_active_banks == 1) { actcycles += max(zero, timestamp - first_act_cycle); last_pre_cycle = timestamp; idle_act_update(memSpec, latest_read_cycle, latest_write_cycle, latest_act_cycle, timestamp); - } else if (mem_state == 0) { + } else if (num_active_banks == 0) { precycles += max(zero, timestamp - last_pre_cycle); idle_pre_update(memSpec, timestamp, latest_pre_cycle); last_pre_cycle = timestamp; } latest_pre_cycle = timestamp; - if (mem_state > 0) { - mem_state--; + if (num_active_banks > 0) { + num_active_banks--; } else { - mem_state = 0; + num_active_banks = 0; } } else if (type == MemCommand::PREA) { + printWarningIfPoweredDown("Command issued while in power-down mode.", type, timestamp, bank); // If command is explicit PREA (precharge all banks) - update // number of precharges by the number of banks, update the bank // state of all banks to PRE and set the precharge cycle. // Calculate the number of active cycles if the memory was in the // active state before, but there is a state transition to PRE now. // If not, update the number of precharged cycles and idle cycles. - if (timestamp == 0) { - numberofpres += 0; - } else { - numberofpres += mem_state; - } + numberofpres += num_active_banks; - if (mem_state > 0) { + if (num_active_banks > 0) { actcycles += max(zero, timestamp - first_act_cycle); idle_act_update(memSpec, latest_read_cycle, latest_write_cycle, latest_act_cycle, timestamp); - } else if (mem_state == 0) { + } else if (num_active_banks == 0) { precycles += max(zero, timestamp - last_pre_cycle); idle_pre_update(memSpec, timestamp, latest_pre_cycle); } @@ -378,10 +321,10 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, latest_pre_cycle = timestamp; last_pre_cycle = timestamp; - mem_state = 0; + num_active_banks = 0; - for (int j = 0; j < nbrofBanks; j++) { - bankstate[j] = 0; + for (auto& b : bankstate) { + b = 0; } } else if (type == MemCommand::PDN_F_ACT) { // If command is fast-exit active power-down - update number of @@ -391,9 +334,7 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, // after powering-up. Update active and active idle cycles. printWarningIfNotActive("All banks are precharged! Incorrect use of Active Power-Down.", type, timestamp, bank); f_act_pdns++; - for (int j = 0; j < nbrofBanks; j++) { - last_states[j] = bankstate[j]; - } + last_states = bankstate; pdn_cycle = timestamp; actcycles += max(zero, timestamp - first_act_cycle); idle_act_update(memSpec, latest_read_cycle, latest_write_cycle, @@ -407,9 +348,7 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, // after powering-up. Update active and active idle cycles. printWarningIfNotActive("All banks are precharged! Incorrect use of Active Power-Down.", type, timestamp, bank); s_act_pdns++; - for (int j = 0; j < nbrofBanks; j++) { - last_states[j] = bankstate[j]; - } + last_states = bankstate; pdn_cycle = timestamp; actcycles += max(zero, timestamp - first_act_cycle); idle_act_update(memSpec, latest_read_cycle, latest_write_cycle, @@ -461,14 +400,14 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, memSpec.memTimingSpec.XPDLL - (2 * memSpec.memTimingSpec.RCD)); } - } else if ((mem_state != CommandAnalysis::MS_PDN_S_ACT) || (mem_state != - CommandAnalysis::MS_PDN_F_ACT)) { + } else if (mem_state != CommandAnalysis::MS_PDN_S_ACT || mem_state != CommandAnalysis::MS_PDN_F_ACT) { cerr << "Incorrect use of Active Power-Up!" << endl; } + num_active_banks = 0; mem_state = 0; - for (int j = 0; j < nbrofBanks; j++) { - bankstate[j] = last_states[j]; - mem_state += last_states[j]; + bankstate = last_states; + for (auto& a : last_states) { + num_active_banks += static_cast<unsigned int>(a); } first_act_cycle = timestamp; } else if (type == MemCommand::PUP_PRE) { @@ -493,11 +432,11 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, memSpec.memTimingSpec.XPDLL - memSpec.memTimingSpec.RCD - memSpec.memTimingSpec.RP); } - } else if ((mem_state != CommandAnalysis::MS_PDN_S_PRE) || (mem_state != - CommandAnalysis::MS_PDN_F_PRE)) { + } else if (mem_state != CommandAnalysis::MS_PDN_S_PRE || mem_state != CommandAnalysis::MS_PDN_F_PRE) { cerr << "Incorrect use of Precharged Power-Up!" << endl; } mem_state = 0; + num_active_banks = 0; last_pre_cycle = timestamp; } else if (type == MemCommand::SREN) { // If command is self-refresh - update number of self-refreshes, @@ -583,14 +522,15 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, } } mem_state = 0; - } else if ((type == MemCommand::END) || (type == MemCommand::NOP)) { + num_active_banks = 0; + } else if (type == MemCommand::END || type == MemCommand::NOP) { // May be optionally used at the end of memory trace for better accuracy // Update all counters based on completion of operations. - if ((mem_state > 0) && (mem_state < 9)) { + if (num_active_banks > 0 && mem_state == 0) { actcycles += max(zero, timestamp - first_act_cycle); idle_act_update(memSpec, latest_read_cycle, latest_write_cycle, latest_act_cycle, timestamp); - } else if (mem_state == 0) { + } else if (num_active_banks == 0 && mem_state == 0) { precycles += max(zero, timestamp - last_pre_cycle); idle_pre_update(memSpec, timestamp, latest_pre_cycle); } else if (mem_state == CommandAnalysis::MS_PDN_F_ACT) { @@ -604,6 +544,9 @@ void CommandAnalysis::evaluate(const MemorySpecification& memSpec, } else if (mem_state == CommandAnalysis::MS_SREF) { sref_cycles += max(zero, timestamp - sref_cycle); } + } else { + printWarning("Unknown command given, exiting.", type, timestamp, bank); + exit(-1); } } } // CommandAnalysis::evaluate @@ -646,14 +589,21 @@ void CommandAnalysis::idle_pre_update(const MemorySpecification& memSpec, void CommandAnalysis::printWarningIfActive(const string& warning, int type, int64_t timestamp, int bank) { - if (mem_state != 0) { + if (num_active_banks != 0) { printWarning(warning, type, timestamp, bank); } } void CommandAnalysis::printWarningIfNotActive(const string& warning, int type, int64_t timestamp, int bank) { - if (mem_state == 0) { + if (num_active_banks == 0) { + printWarning(warning, type, timestamp, bank); + } +} + +void CommandAnalysis::printWarningIfPoweredDown(const string& warning, int type, int64_t timestamp, int bank) +{ + if (mem_state != 0) { printWarning(warning, type, timestamp, bank); } } diff --git a/ext/drampower/src/CommandAnalysis.h b/ext/drampower/src/CommandAnalysis.h index b5c7ac778..15261fb2f 100644 --- a/ext/drampower/src/CommandAnalysis.h +++ b/ext/drampower/src/CommandAnalysis.h @@ -58,10 +58,8 @@ class CommandAnalysis { MS_PDN_S_PRE = 13, MS_SREF = 14 }; - CommandAnalysis(); - // Returns number of reads, writes, acts, pres and refs in the trace - CommandAnalysis(const int nbrofBanks); + CommandAnalysis(const int64_t nbrofBanks); // Number of activate commands int64_t numberofacts; @@ -117,29 +115,25 @@ class CommandAnalysis { // Number of precharged auto-refresh cycles during self-refresh exit int64_t spup_ref_pre_cycles; + // function for clearing counters + void clearStats(const int64_t timestamp); + // function for clearing arrays void clear(); // To identify auto-precharges void getCommands(const MemorySpecification& memSpec, - const int - nbrofBanks, std::vector<MemCommand>& list, bool lastupdate); private: - unsigned init; int64_t zero; - unsigned pop; // Cached last read command from the file std::vector<MemCommand> cached_cmd; // Stores the memory commands for analysis std::vector<MemCommand> cmd_list; - // Stores all memory commands for analysis - std::vector<MemCommand> full_cmd_list; - // To save states of the different banks, before entering active // power-down mode (slow/fast-exit). std::vector<int> last_states; @@ -171,26 +165,20 @@ class CommandAnalysis { // Memory State unsigned mem_state; + unsigned num_active_banks; // Clock cycle of first activate command when memory state changes to ACT int64_t first_act_cycle; // Clock cycle of last precharge command when memory state changes to PRE int64_t last_pre_cycle; - // To collect and analyse all commands including auto-precharges - void analyse_commands(const int nbrofBanks, - Data::MemorySpecification - memSpec, - int64_t nCommands, - int64_t nCached, - bool lastupdate); + // To perform timing analysis of a given set of commands and update command counters void evaluate(const MemorySpecification& memSpec, - std::vector<MemCommand>& cmd_list, - int nbrofBanks); + std::vector<MemCommand>& cmd_list); // To calculate time of completion of any issued command - int timeToCompletion(const MemorySpecification& memSpec, + int64_t timeToCompletion(const MemorySpecification& memSpec, MemCommand::cmds type); // To update idle period information whenever active cycles may be idle @@ -207,6 +195,7 @@ class CommandAnalysis { void printWarningIfActive(const std::string& warning, int type, int64_t timestamp, int bank); void printWarningIfNotActive(const std::string& warning, int type, int64_t timestamp, int bank); + void printWarningIfPoweredDown(const std::string& warning, int type, int64_t timestamp, int bank); void printWarning(const std::string& warning, int type, int64_t timestamp, int bank); }; } diff --git a/ext/drampower/src/MemArchitectureSpec.h b/ext/drampower/src/MemArchitectureSpec.h index ca79edc91..49eddc8ac 100644 --- a/ext/drampower/src/MemArchitectureSpec.h +++ b/ext/drampower/src/MemArchitectureSpec.h @@ -31,13 +31,15 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Karthik Chandrasekar + * Authors: Karthik Chandrasekar, Sven Goossens * */ #ifndef TOOLS_MEM_ARCHITECTURE_SPEC_H #define TOOLS_MEM_ARCHITECTURE_SPEC_H +#include <stdint.h> + #include "Parametrisable.h" namespace Data { @@ -46,14 +48,14 @@ class MemArchitectureSpec : public virtual Parametrisable { MemArchitectureSpec(); void processParameters(); - unsigned int burstLength; - unsigned nbrOfBanks; - unsigned nbrOfRanks; - unsigned dataRate; - unsigned nbrOfColumns; - unsigned nbrOfRows; - unsigned width; - unsigned nbrOfBankGroups; + int64_t burstLength; + int64_t nbrOfBanks; + int64_t nbrOfRanks; + int64_t dataRate; + int64_t nbrOfColumns; + int64_t nbrOfRows; + int64_t width; + int64_t nbrOfBankGroups; bool dll; bool twoVoltageDomains; bool termination; diff --git a/ext/drampower/src/MemCommand.cc b/ext/drampower/src/MemCommand.cc index 156716c2f..5e1115e05 100644 --- a/ext/drampower/src/MemCommand.cc +++ b/ext/drampower/src/MemCommand.cc @@ -44,15 +44,9 @@ using namespace Data; using namespace std; -MemCommand::MemCommand() : - type(MemCommand::PRE), - bank(0), - timestamp(0) -{ -} MemCommand::MemCommand(MemCommand::cmds type, - unsigned bank, double timestamp) : + unsigned bank, int64_t timestamp) : type(type), bank(bank), timestamp(timestamp) @@ -80,35 +74,35 @@ unsigned MemCommand::getBank() const } // For auto-precharge with read or write - to calculate cycle of precharge -int MemCommand::getPrechargeOffset(const MemorySpecification& memSpec, +int64_t MemCommand::getPrechargeOffset(const MemorySpecification& memSpec, MemCommand::cmds type) const { - int precharge_offset = 0; + int64_t precharge_offset = 0; - int BL(static_cast<int>(memSpec.memArchSpec.burstLength)); - int RTP(static_cast<int>(memSpec.memTimingSpec.RTP)); - int dataRate(static_cast<int>(memSpec.memArchSpec.dataRate)); - int AL(static_cast<int>(memSpec.memTimingSpec.AL)); - int WL(static_cast<int>(memSpec.memTimingSpec.WL)); - int WR(static_cast<int>(memSpec.memTimingSpec.WR)); - int B = BL/dataRate; + int64_t BL = memSpec.memArchSpec.burstLength; + int64_t RTP = memSpec.memTimingSpec.RTP; + int64_t dataRate = memSpec.memArchSpec.dataRate; + int64_t AL = memSpec.memTimingSpec.AL; + int64_t WL = memSpec.memTimingSpec.WL; + int64_t WR = memSpec.memTimingSpec.WR; + int64_t B = BL/dataRate; const MemoryType::MemoryType_t& memType = memSpec.memoryType; // Read with auto-precharge if (type == MemCommand::RDA) { if (memType == MemoryType::DDR2) { - precharge_offset = B + AL - 2 + max(RTP, 2); + precharge_offset = B + AL - 2 + max(RTP, int64_t(2)); } else if (memType == MemoryType::DDR3) { - precharge_offset = AL + max(RTP, 4); + precharge_offset = AL + max(RTP, int64_t(4)); } else if (memType == MemoryType::DDR4) { precharge_offset = AL + RTP; } else if (memType == MemoryType::LPDDR) { precharge_offset = B; } else if (memType == MemoryType::LPDDR2) { - precharge_offset = B + max(0, RTP - 2); + precharge_offset = B + max(int64_t(0), RTP - 2); } else if (memType == MemoryType::LPDDR3) { - precharge_offset = B + max(0, RTP - 4); + precharge_offset = B + max(int64_t(0), RTP - 4); } else if (memType == MemoryType::WIDEIO_SDR) { precharge_offset = B; } @@ -133,19 +127,14 @@ int MemCommand::getPrechargeOffset(const MemorySpecification& memSpec, return precharge_offset; } // MemCommand::getPrechargeOffset -void MemCommand::setTime(double _timestamp) +void MemCommand::setTime(int64_t _timestamp) { timestamp = _timestamp; } -double MemCommand::getTime() const -{ - return timestamp; -} - int64_t MemCommand::getTimeInt64() const { - return static_cast<int64_t>(timestamp); + return timestamp; } MemCommand::cmds MemCommand::typeWithoutAutoPrechargeFlag() const diff --git a/ext/drampower/src/MemCommand.h b/ext/drampower/src/MemCommand.h index ea7164577..9eb751088 100644 --- a/ext/drampower/src/MemCommand.h +++ b/ext/drampower/src/MemCommand.h @@ -86,17 +86,18 @@ class MemCommand { PUP_ACT = 14, SREN = 15, SREX = 16, - NOP = 17 + NOP = 17, + UNINITIALIZED = 18 }; - MemCommand(); +// MemCommand(); MemCommand( // Command Type - MemCommand::cmds type, + MemCommand::cmds type = UNINITIALIZED, // Target Bank unsigned bank = 0, // Command Issue Timestamp (in cc) - double timestamp = 0); + int64_t timestamp = 0L); // Get command type cmds getType() const; @@ -111,16 +112,15 @@ class MemCommand { unsigned getBank() const; // Set timestamp - void setTime(double _timestamp); + void setTime(int64_t _timestamp); // Get timestamp - double getTime() const; int64_t getTimeInt64() const; cmds typeWithoutAutoPrechargeFlag() const; // To calculate precharge offset after read or write with auto-precharge - int getPrechargeOffset(const MemorySpecification& memSpec, + int64_t getPrechargeOffset(const MemorySpecification& memSpec, MemCommand::cmds type) const; // To check for equivalence @@ -136,19 +136,35 @@ class MemCommand { } } - static const unsigned int nCommands = 18; + static const unsigned int nCommands = 19; static std::string* getCommandTypeStrings() { - static std::string type_map[nCommands] = { "ACT", "RD", "WR", "PRE", "REF", - "END", "RDA", "WRA", "PREA", "PDN_F_PRE","PDN_S_PRE", "PDN_F_ACT", - "PDN_S_ACT", "PUP_PRE", "PUP_ACT", "SREN", "SREX", "NOP" }; + static std::string type_map[nCommands] = { "ACT", + "RD", + "WR", + "PRE", + "REF", + "END", + "RDA", + "WRA", + "PREA", + "PDN_F_PRE", + "PDN_S_PRE", + "PDN_F_ACT", + "PDN_S_ACT", + "PUP_PRE", + "PUP_ACT", + "SREN", + "SREX", + "NOP", + "UNINITIALIZED" }; return type_map; } // To identify command type from name - static cmds getTypeFromName(const std::string name) + static cmds getTypeFromName(const std::string& name) { std::string* typeStrings = getCommandTypeStrings(); @@ -165,7 +181,7 @@ class MemCommand { private: MemCommand::cmds type; unsigned bank; - double timestamp; + int64_t timestamp; }; } #endif // ifndef MEMCOMMAND_H diff --git a/ext/drampower/src/MemTimingSpec.h b/ext/drampower/src/MemTimingSpec.h index 1c3a80c6e..104bf5c71 100644 --- a/ext/drampower/src/MemTimingSpec.h +++ b/ext/drampower/src/MemTimingSpec.h @@ -31,10 +31,12 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Karthik Chandrasekar + * Authors: Karthik Chandrasekar, Sven Goossens * */ +#include <stdint.h> + #include "Parametrisable.h" namespace Data { @@ -44,35 +46,35 @@ class MemTimingSpec : public virtual Parametrisable { void processParameters(); double clkMhz; - unsigned RC; - unsigned RCD; - unsigned CCD; - unsigned CCD_S; - unsigned CCD_L; - unsigned RRD; - unsigned RRD_S; - unsigned RRD_L; - unsigned FAW; - unsigned TAW; - unsigned WTR; - unsigned WTR_S; - unsigned WTR_L; - unsigned REFI; - unsigned RL; - unsigned RP; - unsigned RFC; - unsigned RAS; - unsigned WL; - unsigned AL; - unsigned DQSCK; - unsigned RTP; - unsigned WR; - unsigned XP; - unsigned XPDLL; - unsigned XS; - unsigned XSDLL; - unsigned CKE; - unsigned CKESR; + int64_t RC; + int64_t RCD; + int64_t CCD; + int64_t CCD_S; + int64_t CCD_L; + int64_t RRD; + int64_t RRD_S; + int64_t RRD_L; + int64_t FAW; + int64_t TAW; + int64_t WTR; + int64_t WTR_S; + int64_t WTR_L; + int64_t REFI; + int64_t RL; + int64_t RP; + int64_t RFC; + int64_t RAS; + int64_t WL; + int64_t AL; + int64_t DQSCK; + int64_t RTP; + int64_t WR; + int64_t XP; + int64_t XPDLL; + int64_t XS; + int64_t XSDLL; + int64_t CKE; + int64_t CKESR; double clkPeriod; }; } diff --git a/ext/drampower/src/MemoryPowerModel.cc b/ext/drampower/src/MemoryPowerModel.cc index 4817d1bb5..e020830e6 100644 --- a/ext/drampower/src/MemoryPowerModel.cc +++ b/ext/drampower/src/MemoryPowerModel.cc @@ -37,23 +37,24 @@ #include "MemoryPowerModel.h" -#include <cmath> // For pow - #include <stdint.h> +#include <cmath> // For pow +#include <iostream> // fmtflags + using namespace std; using namespace Data; // Calculate energy and average power consumption for the given command trace -void MemoryPowerModel::power_calc(MemorySpecification memSpec, - const CommandAnalysis& counters, +void MemoryPowerModel::power_calc(const MemorySpecification& memSpec, + const CommandAnalysis& c, int term) { - MemTimingSpec& t = memSpec.memTimingSpec; - MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; - MemPowerSpec& mps = memSpec.memPowerSpec; + const MemTimingSpec& t = memSpec.memTimingSpec; + const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; + const MemPowerSpec& mps = memSpec.memPowerSpec; energy.act_energy = 0.0; energy.pre_energy = 0.0; @@ -102,16 +103,16 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec, // 1 DQS and 1 DM pin is associated with every data byte int64_t dqPlusDqsPlusMaskBits = memArchSpec.width + memArchSpec.width / 8 + memArchSpec.width / 8; // Size of one clock period for the data bus. - double ddrPeriod = t.clkPeriod / memArchSpec.dataRate; + double ddrPeriod = t.clkPeriod / static_cast<double>(memArchSpec.dataRate); // Read IO power is consumed by each DQ (data) and DQS (data strobe) pin - energy.read_io_energy = calcIoTermEnergy(counters.numberofreads * memArchSpec.burstLength, + energy.read_io_energy = calcIoTermEnergy(c.numberofreads * memArchSpec.burstLength, ddrPeriod, power.IO_power, dqPlusDqsBits); // Write ODT power is consumed by each DQ (data), DQS (data strobe) and DM - energy.write_term_energy = calcIoTermEnergy(counters.numberofwrites * memArchSpec.burstLength, + energy.write_term_energy = calcIoTermEnergy(c.numberofwrites * memArchSpec.burstLength, ddrPeriod, power.WR_ODT_power, dqPlusDqsPlusMaskBits); @@ -119,14 +120,14 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec, if (memArchSpec.nbrOfRanks > 1) { // Termination power consumed in the idle rank during reads on the active // rank by each DQ (data) and DQS (data strobe) pin. - energy.read_oterm_energy = calcIoTermEnergy(counters.numberofreads * memArchSpec.burstLength, + energy.read_oterm_energy = calcIoTermEnergy(c.numberofreads * memArchSpec.burstLength, ddrPeriod, power.TermRD_power, dqPlusDqsBits); // Termination power consumed in the idle rank during writes on the active // rank by each DQ (data), DQS (data strobe) and DM (data mask) pin. - energy.write_oterm_energy = calcIoTermEnergy(counters.numberofwrites * memArchSpec.burstLength, + energy.write_oterm_energy = calcIoTermEnergy(c.numberofwrites * memArchSpec.burstLength, ddrPeriod, power.TermWR_power, dqPlusDqsPlusMaskBits); @@ -137,101 +138,101 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec, + energy.read_oterm_energy + energy.write_oterm_energy; } - total_cycles = counters.actcycles + counters.precycles + - counters.f_act_pdcycles + counters.f_pre_pdcycles + - counters.s_act_pdcycles + counters.s_pre_pdcycles + counters.sref_cycles - + counters.sref_ref_act_cycles + counters.sref_ref_pre_cycles + - counters.spup_ref_act_cycles + counters.spup_ref_pre_cycles; + total_cycles = c.actcycles + c.precycles + + c.f_act_pdcycles + c.f_pre_pdcycles + + c.s_act_pdcycles + c.s_pre_pdcycles + c.sref_cycles + + c.sref_ref_act_cycles + c.sref_ref_pre_cycles + + c.spup_ref_act_cycles + c.spup_ref_pre_cycles; EnergyDomain vdd0Domain(mps.vdd, t.clkPeriod); - energy.act_energy = vdd0Domain.calcTivEnergy(counters.numberofacts * t.RAS , mps.idd0 - mps.idd3n); - energy.pre_energy = vdd0Domain.calcTivEnergy(counters.numberofpres * (t.RC - t.RAS) , mps.idd0 - mps.idd2n); - energy.read_energy = vdd0Domain.calcTivEnergy(counters.numberofreads * burstCc , mps.idd4r - mps.idd3n); - energy.write_energy = vdd0Domain.calcTivEnergy(counters.numberofwrites * burstCc , mps.idd4w - mps.idd3n); - energy.ref_energy = vdd0Domain.calcTivEnergy(counters.numberofrefs * t.RFC , mps.idd5 - mps.idd3n); - energy.pre_stdby_energy = vdd0Domain.calcTivEnergy(counters.precycles, mps.idd2n); - energy.act_stdby_energy = vdd0Domain.calcTivEnergy(counters.actcycles, mps.idd3n); + energy.act_energy = vdd0Domain.calcTivEnergy(c.numberofacts * t.RAS , mps.idd0 - mps.idd3n); + energy.pre_energy = vdd0Domain.calcTivEnergy(c.numberofpres * (t.RC - t.RAS) , mps.idd0 - mps.idd2n); + energy.read_energy = vdd0Domain.calcTivEnergy(c.numberofreads * burstCc , mps.idd4r - mps.idd3n); + energy.write_energy = vdd0Domain.calcTivEnergy(c.numberofwrites * burstCc , mps.idd4w - mps.idd3n); + energy.ref_energy = vdd0Domain.calcTivEnergy(c.numberofrefs * t.RFC , mps.idd5 - mps.idd3n); + energy.pre_stdby_energy = vdd0Domain.calcTivEnergy(c.precycles, mps.idd2n); + energy.act_stdby_energy = vdd0Domain.calcTivEnergy(c.actcycles, mps.idd3n); // Idle energy in the active standby clock cycles - energy.idle_energy_act = vdd0Domain.calcTivEnergy(counters.idlecycles_act, mps.idd3n); + energy.idle_energy_act = vdd0Domain.calcTivEnergy(c.idlecycles_act, mps.idd3n); // Idle energy in the precharge standby clock cycles - energy.idle_energy_pre = vdd0Domain.calcTivEnergy(counters.idlecycles_pre, mps.idd2n); + energy.idle_energy_pre = vdd0Domain.calcTivEnergy(c.idlecycles_pre, mps.idd2n); // fast-exit active power-down cycles energy - energy.f_act_pd_energy = vdd0Domain.calcTivEnergy(counters.f_act_pdcycles, mps.idd3p1); + energy.f_act_pd_energy = vdd0Domain.calcTivEnergy(c.f_act_pdcycles, mps.idd3p1); // fast-exit precharged power-down cycles energy - energy.f_pre_pd_energy = vdd0Domain.calcTivEnergy(counters.f_pre_pdcycles, mps.idd2p1); + energy.f_pre_pd_energy = vdd0Domain.calcTivEnergy(c.f_pre_pdcycles, mps.idd2p1); // slow-exit active power-down cycles energy - energy.s_act_pd_energy = vdd0Domain.calcTivEnergy(counters.s_act_pdcycles, mps.idd3p0); + energy.s_act_pd_energy = vdd0Domain.calcTivEnergy(c.s_act_pdcycles, mps.idd3p0); // slow-exit precharged power-down cycles energy - energy.s_pre_pd_energy = vdd0Domain.calcTivEnergy(counters.s_pre_pdcycles, mps.idd2p0); + energy.s_pre_pd_energy = vdd0Domain.calcTivEnergy(c.s_pre_pdcycles, mps.idd2p0); // self-refresh cycles energy including a refresh per self-refresh entry energy.sref_energy = engy_sref(mps.idd6, mps.idd3n, mps.idd5, mps.vdd, - static_cast<double>(counters.sref_cycles), static_cast<double>(counters.sref_ref_act_cycles), - static_cast<double>(counters.sref_ref_pre_cycles), static_cast<double>(counters.spup_ref_act_cycles), - static_cast<double>(counters.spup_ref_pre_cycles), t.clkPeriod); + static_cast<double>(c.sref_cycles), static_cast<double>(c.sref_ref_act_cycles), + static_cast<double>(c.sref_ref_pre_cycles), static_cast<double>(c.spup_ref_act_cycles), + static_cast<double>(c.spup_ref_pre_cycles), t.clkPeriod); // background energy during active auto-refresh cycles in self-refresh - energy.sref_ref_act_energy = vdd0Domain.calcTivEnergy(counters.sref_ref_act_cycles, mps.idd3p0); + energy.sref_ref_act_energy = vdd0Domain.calcTivEnergy(c.sref_ref_act_cycles, mps.idd3p0); // background energy during precharged auto-refresh cycles in self-refresh - energy.sref_ref_pre_energy = vdd0Domain.calcTivEnergy(counters.sref_ref_pre_cycles, mps.idd2p0); + energy.sref_ref_pre_energy = vdd0Domain.calcTivEnergy(c.sref_ref_pre_cycles, mps.idd2p0); // background energy during active auto-refresh cycles in self-refresh exit - energy.spup_ref_act_energy = vdd0Domain.calcTivEnergy(counters.spup_ref_act_cycles, mps.idd3n); + energy.spup_ref_act_energy = vdd0Domain.calcTivEnergy(c.spup_ref_act_cycles, mps.idd3n); // background energy during precharged auto-refresh cycles in self-refresh exit - energy.spup_ref_pre_energy = vdd0Domain.calcTivEnergy(counters.spup_ref_pre_cycles, mps.idd2n); + energy.spup_ref_pre_energy = vdd0Domain.calcTivEnergy(c.spup_ref_pre_cycles, mps.idd2n); // self-refresh power-up cycles energy -- included - energy.spup_energy = vdd0Domain.calcTivEnergy(counters.spup_cycles, mps.idd2n); + energy.spup_energy = vdd0Domain.calcTivEnergy(c.spup_cycles, mps.idd2n); // active power-up cycles energy - same as active standby -- included - energy.pup_act_energy = vdd0Domain.calcTivEnergy(counters.pup_act_cycles, mps.idd3n); + energy.pup_act_energy = vdd0Domain.calcTivEnergy(c.pup_act_cycles, mps.idd3n); // precharged power-up cycles energy - same as precharged standby -- included - energy.pup_pre_energy = vdd0Domain.calcTivEnergy(counters.pup_pre_cycles, mps.idd2n); + energy.pup_pre_energy = vdd0Domain.calcTivEnergy(c.pup_pre_cycles, mps.idd2n); // similar equations as before to support multiple voltage domains in LPDDR2 // and WIDEIO memories if (memArchSpec.twoVoltageDomains) { EnergyDomain vdd2Domain(mps.vdd2, t.clkPeriod); - energy.act_energy += vdd2Domain.calcTivEnergy(counters.numberofacts * t.RAS , mps.idd02 - mps.idd3n2); - energy.pre_energy += vdd2Domain.calcTivEnergy(counters.numberofpres * (t.RC - t.RAS) , mps.idd02 - mps.idd2n2); - energy.read_energy += vdd2Domain.calcTivEnergy(counters.numberofreads * burstCc , mps.idd4r2 - mps.idd3n2); - energy.write_energy += vdd2Domain.calcTivEnergy(counters.numberofwrites * burstCc , mps.idd4w2 - mps.idd3n2); - energy.ref_energy += vdd2Domain.calcTivEnergy(counters.numberofrefs * t.RFC , mps.idd52 - mps.idd3n2); - energy.pre_stdby_energy += vdd2Domain.calcTivEnergy(counters.precycles, mps.idd2n2); - energy.act_stdby_energy += vdd2Domain.calcTivEnergy(counters.actcycles, mps.idd3n2); + energy.act_energy += vdd2Domain.calcTivEnergy(c.numberofacts * t.RAS , mps.idd02 - mps.idd3n2); + energy.pre_energy += vdd2Domain.calcTivEnergy(c.numberofpres * (t.RC - t.RAS) , mps.idd02 - mps.idd2n2); + energy.read_energy += vdd2Domain.calcTivEnergy(c.numberofreads * burstCc , mps.idd4r2 - mps.idd3n2); + energy.write_energy += vdd2Domain.calcTivEnergy(c.numberofwrites * burstCc , mps.idd4w2 - mps.idd3n2); + energy.ref_energy += vdd2Domain.calcTivEnergy(c.numberofrefs * t.RFC , mps.idd52 - mps.idd3n2); + energy.pre_stdby_energy += vdd2Domain.calcTivEnergy(c.precycles, mps.idd2n2); + energy.act_stdby_energy += vdd2Domain.calcTivEnergy(c.actcycles, mps.idd3n2); // Idle energy in the active standby clock cycles - energy.idle_energy_act += vdd2Domain.calcTivEnergy(counters.idlecycles_act, mps.idd3n2); + energy.idle_energy_act += vdd2Domain.calcTivEnergy(c.idlecycles_act, mps.idd3n2); // Idle energy in the precharge standby clock cycles - energy.idle_energy_pre += vdd2Domain.calcTivEnergy(counters.idlecycles_pre, mps.idd2n2); + energy.idle_energy_pre += vdd2Domain.calcTivEnergy(c.idlecycles_pre, mps.idd2n2); // fast-exit active power-down cycles energy - energy.f_act_pd_energy += vdd2Domain.calcTivEnergy(counters.f_act_pdcycles, mps.idd3p12); + energy.f_act_pd_energy += vdd2Domain.calcTivEnergy(c.f_act_pdcycles, mps.idd3p12); // fast-exit precharged power-down cycles energy - energy.f_pre_pd_energy += vdd2Domain.calcTivEnergy(counters.f_pre_pdcycles, mps.idd2p12); + energy.f_pre_pd_energy += vdd2Domain.calcTivEnergy(c.f_pre_pdcycles, mps.idd2p12); // slow-exit active power-down cycles energy - energy.s_act_pd_energy += vdd2Domain.calcTivEnergy(counters.s_act_pdcycles, mps.idd3p02); + energy.s_act_pd_energy += vdd2Domain.calcTivEnergy(c.s_act_pdcycles, mps.idd3p02); // slow-exit precharged power-down cycles energy - energy.s_pre_pd_energy += vdd2Domain.calcTivEnergy(counters.s_pre_pdcycles, mps.idd2p02); + energy.s_pre_pd_energy += vdd2Domain.calcTivEnergy(c.s_pre_pdcycles, mps.idd2p02); energy.sref_energy += engy_sref(mps.idd62, mps.idd3n2, mps.idd52, mps.vdd2, - static_cast<double>(counters.sref_cycles), static_cast<double>(counters.sref_ref_act_cycles), - static_cast<double>(counters.sref_ref_pre_cycles), static_cast<double>(counters.spup_ref_act_cycles), - static_cast<double>(counters.spup_ref_pre_cycles), t.clkPeriod); + static_cast<double>(c.sref_cycles), static_cast<double>(c.sref_ref_act_cycles), + static_cast<double>(c.sref_ref_pre_cycles), static_cast<double>(c.spup_ref_act_cycles), + static_cast<double>(c.spup_ref_pre_cycles), t.clkPeriod); // background energy during active auto-refresh cycles in self-refresh - energy.sref_ref_act_energy += vdd2Domain.calcTivEnergy(counters.sref_ref_act_cycles, mps.idd3p02); + energy.sref_ref_act_energy += vdd2Domain.calcTivEnergy(c.sref_ref_act_cycles, mps.idd3p02); // background energy during precharged auto-refresh cycles in self-refresh - energy.sref_ref_pre_energy += vdd2Domain.calcTivEnergy(counters.sref_ref_pre_cycles, mps.idd2p02); + energy.sref_ref_pre_energy += vdd2Domain.calcTivEnergy(c.sref_ref_pre_cycles, mps.idd2p02); // background energy during active auto-refresh cycles in self-refresh exit - energy.spup_ref_act_energy += vdd2Domain.calcTivEnergy(counters.spup_ref_act_cycles, mps.idd3n2); + energy.spup_ref_act_energy += vdd2Domain.calcTivEnergy(c.spup_ref_act_cycles, mps.idd3n2); // background energy during precharged auto-refresh cycles in self-refresh exit - energy.spup_ref_pre_energy += vdd2Domain.calcTivEnergy(counters.spup_ref_pre_cycles, mps.idd2n2); + energy.spup_ref_pre_energy += vdd2Domain.calcTivEnergy(c.spup_ref_pre_cycles, mps.idd2n2); // self-refresh power-up cycles energy -- included - energy.spup_energy += vdd2Domain.calcTivEnergy(counters.spup_cycles, mps.idd2n2); + energy.spup_energy += vdd2Domain.calcTivEnergy(c.spup_cycles, mps.idd2n2); // active power-up cycles energy - same as active standby -- included - energy.pup_act_energy += vdd2Domain.calcTivEnergy(counters.pup_act_cycles, mps.idd3n2); + energy.pup_act_energy += vdd2Domain.calcTivEnergy(c.pup_act_cycles, mps.idd3n2); // precharged power-up cycles energy - same as precharged standby -- included - energy.pup_pre_energy += vdd2Domain.calcTivEnergy(counters.pup_pre_cycles, mps.idd2n2); + energy.pup_pre_energy += vdd2Domain.calcTivEnergy(c.pup_pre_cycles, mps.idd2n2); } // auto-refresh energy during self-refresh cycles @@ -244,7 +245,7 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec, // energy components for both ranks (in a dual-rank system) energy.total_energy = energy.act_energy + energy.pre_energy + energy.read_energy + energy.write_energy + energy.ref_energy + energy.io_term_energy + - memArchSpec.nbrOfRanks * (energy.act_stdby_energy + + static_cast<double>(memArchSpec.nbrOfRanks) * (energy.act_stdby_energy + energy.pre_stdby_energy + energy.sref_energy + energy.f_act_pd_energy + energy.f_pre_pd_energy + energy.s_act_pd_energy + energy.s_pre_pd_energy + energy.sref_ref_energy + energy.spup_ref_energy); @@ -253,130 +254,100 @@ void MemoryPowerModel::power_calc(MemorySpecification memSpec, power.average_power = energy.total_energy / (static_cast<double>(total_cycles) * t.clkPeriod); } // MemoryPowerModel::power_calc -void MemoryPowerModel::power_print(MemorySpecification memSpec, int term, const CommandAnalysis& counters) const +void MemoryPowerModel::power_print(const MemorySpecification& memSpec, int term, const CommandAnalysis& c) const { - MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; - MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; + const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; + const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; + const uint64_t nRanks = static_cast<uint64_t>(memArchSpec.nbrOfRanks); + const char eUnit[] = " pJ"; + ios_base::fmtflags flags = cout.flags(); + streamsize precision = cout.precision(); cout.precision(0); - cout << "* Trace Details:" << endl; - cout << "Number of Activates: " << fixed << counters.numberofacts << endl; - cout << "Number of Reads: " << counters.numberofreads << endl; - cout << "Number of Writes: " << counters.numberofwrites << endl; - cout << "Number of Precharges: " << counters.numberofpres << endl; - cout << "Number of Refreshes: " << counters.numberofrefs << endl; - cout << "Number of Active Cycles: " << counters.actcycles << endl; - cout << " Number of Active Idle Cycles: " << counters.idlecycles_act << endl; - cout << " Number of Active Power-Up Cycles: " << counters.pup_act_cycles << endl; - cout << " Number of Auto-Refresh Active cycles during Self-Refresh " << - "Power-Up: " << counters.spup_ref_act_cycles << endl; - cout << "Number of Precharged Cycles: " << counters.precycles << endl; - cout << " Number of Precharged Idle Cycles: " << counters.idlecycles_pre << endl; - cout << " Number of Precharged Power-Up Cycles: " << counters.pup_pre_cycles - << endl; - cout << " Number of Auto-Refresh Precharged cycles during Self-Refresh" - << " Power-Up: " << counters.spup_ref_pre_cycles << endl; - cout << " Number of Self-Refresh Power-Up Cycles: " << counters.spup_cycles - << endl; - cout << "Total Idle Cycles (Active + Precharged): " << - counters.idlecycles_act + counters.idlecycles_pre << endl; - cout << "Number of Power-Downs: " << counters.f_act_pdns + - counters.s_act_pdns + counters.f_pre_pdns + counters.s_pre_pdns << endl; - cout << " Number of Active Fast-exit Power-Downs: " << counters.f_act_pdns - << endl; - cout << " Number of Active Slow-exit Power-Downs: " << counters.s_act_pdns - << endl; - cout << " Number of Precharged Fast-exit Power-Downs: " << - counters.f_pre_pdns << endl; - cout << " Number of Precharged Slow-exit Power-Downs: " << - counters.s_pre_pdns << endl; - cout << "Number of Power-Down Cycles: " << counters.f_act_pdcycles + - counters.s_act_pdcycles + counters.f_pre_pdcycles + counters.s_pre_pdcycles << endl; - cout << " Number of Active Fast-exit Power-Down Cycles: " << - counters.f_act_pdcycles << endl; - cout << " Number of Active Slow-exit Power-Down Cycles: " << - counters.s_act_pdcycles << endl; - cout << " Number of Auto-Refresh Active cycles during Self-Refresh: " << - counters.sref_ref_act_cycles << endl; - cout << " Number of Precharged Fast-exit Power-Down Cycles: " << - counters.f_pre_pdcycles << endl; - cout << " Number of Precharged Slow-exit Power-Down Cycles: " << - counters.s_pre_pdcycles << endl; - cout << " Number of Auto-Refresh Precharged cycles during Self-Refresh: " << - counters.sref_ref_pre_cycles << endl; - cout << "Number of Auto-Refresh Cycles: " << counters.numberofrefs * - memTimingSpec.RFC << endl; - cout << "Number of Self-Refreshes: " << counters.numberofsrefs << endl; - cout << "Number of Self-Refresh Cycles: " << counters.sref_cycles << endl; - cout << "----------------------------------------" << endl; - cout << "Total Trace Length (clock cycles): " << total_cycles << endl; - cout << "----------------------------------------" << endl; + cout << "* Trace Details:" << fixed << endl + << endl << "#ACT commands: " << c.numberofacts + << endl << "#RD + #RDA commands: " << c.numberofreads + << endl << "#WR + #WRA commands: " << c.numberofwrites + /* #PRE commands (precharge all counts a number of #PRE commands equal to the number of active banks) */ + << endl << "#PRE (+ PREA) commands: " << c.numberofpres + << endl << "#REF commands: " << c.numberofrefs + << endl << "#Active Cycles: " << c.actcycles + << endl << " #Active Idle Cycles: " << c.idlecycles_act + << endl << " #Active Power-Up Cycles: " << c.pup_act_cycles + << endl << " #Auto-Refresh Active cycles during Self-Refresh Power-Up: " << c.spup_ref_act_cycles + << endl << "#Precharged Cycles: " << c.precycles + << endl << " #Precharged Idle Cycles: " << c.idlecycles_pre + << endl << " #Precharged Power-Up Cycles: " << c.pup_pre_cycles + << endl << " #Auto-Refresh Precharged cycles during Self-Refresh Power-Up: " << c.spup_ref_pre_cycles + << endl << " #Self-Refresh Power-Up Cycles: " << c.spup_cycles + << endl << "Total Idle Cycles (Active + Precharged): " << c.idlecycles_act + c.idlecycles_pre + << endl << "#Power-Downs: " << c.f_act_pdns + c.s_act_pdns + c.f_pre_pdns + c.s_pre_pdns + << endl << " #Active Fast-exit Power-Downs: " << c.f_act_pdns + << endl << " #Active Slow-exit Power-Downs: " << c.s_act_pdns + << endl << " #Precharged Fast-exit Power-Downs: " << c.f_pre_pdns + << endl << " #Precharged Slow-exit Power-Downs: " << c.s_pre_pdns + << endl << "#Power-Down Cycles: " << c.f_act_pdcycles + c.s_act_pdcycles + c.f_pre_pdcycles + c.s_pre_pdcycles + << endl << " #Active Fast-exit Power-Down Cycles: " << c.f_act_pdcycles + << endl << " #Active Slow-exit Power-Down Cycles: " << c.s_act_pdcycles + << endl << " #Auto-Refresh Active cycles during Self-Refresh: " << c.sref_ref_act_cycles + << endl << " #Precharged Fast-exit Power-Down Cycles: " << c.f_pre_pdcycles + << endl << " #Precharged Slow-exit Power-Down Cycles: " << c.s_pre_pdcycles + << endl << " #Auto-Refresh Precharged cycles during Self-Refresh: " << c.sref_ref_pre_cycles + << endl << "#Auto-Refresh Cycles: " << c.numberofrefs * memTimingSpec.RFC + << endl << "#Self-Refreshes: " << c.numberofsrefs + << endl << "#Self-Refresh Cycles: " << c.sref_cycles + << endl << "----------------------------------------" + << endl << "Total Trace Length (clock cycles): " << total_cycles + << endl << "----------------------------------------" << endl; + cout.precision(2); + cout << endl << "* Trace Power and Energy Estimates:" << endl + << endl << "ACT Cmd Energy: " << energy.act_energy << eUnit + << endl << "PRE Cmd Energy: " << energy.pre_energy << eUnit + << endl << "RD Cmd Energy: " << energy.read_energy << eUnit + << endl << "WR Cmd Energy: " << energy.write_energy << eUnit; - cout << "\n* Trace Power and Energy Estimates:" << endl; - cout << "ACT Cmd Energy: " << energy.act_energy << " pJ" << endl; - cout << "PRE Cmd Energy: " << energy.pre_energy << " pJ" << endl; - cout << "RD Cmd Energy: " << energy.read_energy << " pJ" << endl; - cout << "WR Cmd Energy: " << energy.write_energy << " pJ" << endl; if (term) { - cout << "RD I/O Energy: " << energy.read_io_energy << " pJ" << endl; + cout << "RD I/O Energy: " << energy.read_io_energy << eUnit << endl; // No Termination for LPDDR/2/3 and DDR memories if (memSpec.memArchSpec.termination) { - cout << "WR Termination Energy: " << energy.write_term_energy << " pJ" << endl; + cout << "WR Termination Energy: " << energy.write_term_energy << eUnit << endl; } - if ((memArchSpec.nbrOfRanks > 1) && memSpec.memArchSpec.termination) { - cout << "RD Termination Energy (Idle rank): " << energy.read_oterm_energy - << " pJ" << endl; - cout << "WR Termination Energy (Idle rank): " << energy.write_oterm_energy - << " pJ" << endl; + if (nRanks > 1 && memSpec.memArchSpec.termination) { + cout << "RD Termination Energy (Idle rank): " << energy.read_oterm_energy << eUnit + << endl << "WR Termination Energy (Idle rank): " << energy.write_oterm_energy << eUnit << endl; } } - cout << "ACT Stdby Energy: " << memArchSpec.nbrOfRanks * energy.act_stdby_energy << - " pJ" << endl; - cout << " Active Idle Energy: " << memArchSpec.nbrOfRanks * energy.idle_energy_act << - " pJ" << endl; - cout << " Active Power-Up Energy: " << memArchSpec.nbrOfRanks * energy.pup_act_energy << - " pJ" << endl; - cout << " Active Stdby Energy during Auto-Refresh cycles in Self-Refresh" - << " Power-Up: " << memArchSpec.nbrOfRanks * energy.spup_ref_act_energy << - " pJ" << endl; - cout << "PRE Stdby Energy: " << memArchSpec.nbrOfRanks * energy.pre_stdby_energy << - " pJ" << endl; - cout << " Precharge Idle Energy: " << memArchSpec.nbrOfRanks * energy.idle_energy_pre << - " pJ" << endl; - cout << " Precharged Power-Up Energy: " << memArchSpec.nbrOfRanks * energy.pup_pre_energy << - " pJ" << endl; - cout << " Precharge Stdby Energy during Auto-Refresh cycles " << - "in Self-Refresh Power-Up: " << memArchSpec.nbrOfRanks * energy.spup_ref_pre_energy << - " pJ" << endl; - cout << " Self-Refresh Power-Up Energy: " << memArchSpec.nbrOfRanks * energy.spup_energy << - " pJ" << endl; - cout << "Total Idle Energy (Active + Precharged): " << memArchSpec.nbrOfRanks * - (energy.idle_energy_act + energy.idle_energy_pre) << " pJ" << endl; - cout << "Total Power-Down Energy: " << memArchSpec.nbrOfRanks * (energy.f_act_pd_energy + - energy.f_pre_pd_energy + energy.s_act_pd_energy + energy.s_pre_pd_energy) << " pJ" << endl; - cout << " Fast-Exit Active Power-Down Energy: " << memArchSpec.nbrOfRanks * - energy.f_act_pd_energy << " pJ" << endl; - cout << " Slow-Exit Active Power-Down Energy: " << memArchSpec.nbrOfRanks * - energy.s_act_pd_energy << " pJ" << endl; - cout << " Slow-Exit Active Power-Down Energy during Auto-Refresh cycles " - << "in Self-Refresh: " << memArchSpec.nbrOfRanks * energy.sref_ref_act_energy << - " pJ" << endl; - cout << " Fast-Exit Precharged Power-Down Energy: " << memArchSpec.nbrOfRanks * - energy.f_pre_pd_energy << " pJ" << endl; - cout << " Slow-Exit Precharged Power-Down Energy: " << memArchSpec.nbrOfRanks * - energy.s_pre_pd_energy << " pJ" << endl; - cout << " Slow-Exit Precharged Power-Down Energy during Auto-Refresh " << - "cycles in Self-Refresh: " << memArchSpec.nbrOfRanks * energy.sref_ref_pre_energy << - " pJ" << endl; - cout << "Auto-Refresh Energy: " << energy.ref_energy << " pJ" << endl; - cout << "Self-Refresh Energy: " << memArchSpec.nbrOfRanks * energy.sref_energy << - " pJ" << endl; - cout << "----------------------------------------" << endl; - cout << "Total Trace Energy: " << energy.total_energy << " pJ" << endl; - cout << "Average Power: " << power.average_power << " mW" << endl; - cout << "----------------------------------------" << endl; + + double nRanksDouble = static_cast<double>(nRanks); + + cout << "ACT Stdby Energy: " << nRanksDouble * energy.act_stdby_energy << eUnit + << endl << " Active Idle Energy: " << nRanksDouble * energy.idle_energy_act << eUnit + << endl << " Active Power-Up Energy: " << nRanksDouble * energy.pup_act_energy << eUnit + << endl << " Active Stdby Energy during Auto-Refresh cycles in Self-Refresh Power-Up: " << nRanksDouble * energy.spup_ref_act_energy << eUnit + << endl << "PRE Stdby Energy: " << nRanksDouble * energy.pre_stdby_energy << eUnit + << endl << " Precharge Idle Energy: " << nRanksDouble * energy.idle_energy_pre << eUnit + << endl << " Precharged Power-Up Energy: " << nRanksDouble * energy.pup_pre_energy << eUnit + << endl << " Precharge Stdby Energy during Auto-Refresh cycles in Self-Refresh Power-Up: " << nRanksDouble * energy.spup_ref_pre_energy << eUnit + << endl << " Self-Refresh Power-Up Energy: " << nRanksDouble * energy.spup_energy << eUnit + << endl << "Total Idle Energy (Active + Precharged): " << nRanksDouble * (energy.idle_energy_act + energy.idle_energy_pre) << eUnit + << endl << "Total Power-Down Energy: " << nRanksDouble * (energy.f_act_pd_energy + energy.f_pre_pd_energy + energy.s_act_pd_energy + energy.s_pre_pd_energy) << eUnit + << endl << " Fast-Exit Active Power-Down Energy: " << nRanksDouble * energy.f_act_pd_energy << eUnit + << endl << " Slow-Exit Active Power-Down Energy: " << nRanksDouble * energy.s_act_pd_energy << eUnit + << endl << " Slow-Exit Active Power-Down Energy during Auto-Refresh cycles in Self-Refresh: " << nRanksDouble * energy.sref_ref_act_energy << eUnit + << endl << " Fast-Exit Precharged Power-Down Energy: " << nRanksDouble * energy.f_pre_pd_energy << eUnit + << endl << " Slow-Exit Precharged Power-Down Energy: " << nRanksDouble * energy.s_pre_pd_energy << eUnit + << endl << " Slow-Exit Precharged Power-Down Energy during Auto-Refresh cycles in Self-Refresh: " << nRanksDouble * energy.sref_ref_pre_energy << eUnit + << endl << "Auto-Refresh Energy: " << energy.ref_energy << eUnit + << endl << "Self-Refresh Energy: " << nRanksDouble * energy.sref_energy << eUnit + << endl << "----------------------------------------" + << endl << "Total Trace Energy: " << energy.total_energy << eUnit + << endl << "Average Power: " << power.average_power << " mW" + << endl << "----------------------------------------" << endl; + + cout.flags(flags); + cout.precision(precision); } // MemoryPowerModel::power_print // Self-refresh active energy estimation (not including background energy) @@ -395,11 +366,11 @@ double MemoryPowerModel::engy_sref(double idd6, double idd3n, double idd5, // IO and Termination power calculation based on Micron Power Calculators // Absolute power measures are obtained from Micron Power Calculator (mentioned in mW) -void MemoryPowerModel::io_term_power(MemorySpecification memSpec) +void MemoryPowerModel::io_term_power(const MemorySpecification& memSpec) { - MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; - MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; - MemPowerSpec& memPowerSpec = memSpec.memPowerSpec; + const MemTimingSpec& memTimingSpec = memSpec.memTimingSpec; + const MemArchitectureSpec& memArchSpec = memSpec.memArchSpec; + const MemPowerSpec& memPowerSpec = memSpec.memPowerSpec; power.IO_power = memPowerSpec.ioPower; // in mW power.WR_ODT_power = memPowerSpec.wrOdtPower; // in mW diff --git a/ext/drampower/src/MemoryPowerModel.h b/ext/drampower/src/MemoryPowerModel.h index b894f67dd..2b2304989 100644 --- a/ext/drampower/src/MemoryPowerModel.h +++ b/ext/drampower/src/MemoryPowerModel.h @@ -46,9 +46,9 @@ class MemoryPowerModel { public: // Calculate energy and average power consumption for the given memory // command trace - void power_calc(MemorySpecification memSpec, - const CommandAnalysis& counters, - int term); + void power_calc(const MemorySpecification& memSpec, + const CommandAnalysis& c, + int term); // Used to calculate self-refresh active energy static double engy_sref(double idd6, @@ -145,12 +145,12 @@ class MemoryPowerModel { }; // Print the power and energy - void power_print(MemorySpecification memSpec, + void power_print(const MemorySpecification& memSpec, int term, - const CommandAnalysis& counters) const; + const CommandAnalysis& c) const; // To derive IO and Termination Power measures using DRAM specification - void io_term_power(MemorySpecification memSpec); + void io_term_power(const MemorySpecification& memSpec); Energy energy; Power power; diff --git a/ext/drampower/src/MemorySpecification.h b/ext/drampower/src/MemorySpecification.h index 149d41c28..16d77ef86 100644 --- a/ext/drampower/src/MemorySpecification.h +++ b/ext/drampower/src/MemorySpecification.h @@ -106,7 +106,8 @@ class MemoryType { return val == LPDDR || val == LPDDR2 || val == LPDDR3 || - val == WIDEIO_SDR; + val == WIDEIO_SDR || + val == DDR4; } bool isDDRFamily() const @@ -132,9 +133,11 @@ class MemoryType { double getCapacitance() const { - // LPDDR/2/3 and DDR memories only have IO Power (no ODT) - // Conservative estimates based on Micron Mobile LPDDR2 Power Calculator - // LPDDR/2/3 IO Capacitance in mF + // LPDDR1/2 memories only have IO Power (no ODT) + // LPDDR3 has optional ODT, but it is typically not used (reflections are elimitated by other means (layout)) + // The capacitance values are conservative and based on Micron Mobile LPDDR2 Power Calculator + + // LPDDR/2/3 IO Capacitance in mF if (val == LPDDR) { return 0.0000000045; } else if (val == LPDDR2) { diff --git a/ext/drampower/src/TraceParser.cc b/ext/drampower/src/TraceParser.cc index ec87f06da..2cf9a8572 100644 --- a/ext/drampower/src/TraceParser.cc +++ b/ext/drampower/src/TraceParser.cc @@ -42,14 +42,19 @@ using namespace Data; using namespace std; +TraceParser::TraceParser(int64_t nbrOfBanks) : + counters(nbrOfBanks) +{ +} + + Data::MemCommand TraceParser::parseLine(std::string line) { - MemCommand memcmd; + MemCommand memcmd(MemCommand::UNINITIALIZED, 0, 0); istringstream linestream(line); string item; - double item_val; + int64_t item_val; unsigned itemnum = 0; - MemCommand::cmds type = MemCommand::NOP; // Initialized to prevent warning while (getline(linestream, item, ',')) { if (itemnum == 0) { @@ -62,10 +67,8 @@ Data::MemCommand TraceParser::parseLine(std::string line) } else if (itemnum == 2) { stringstream bank(item); bank >> item_val; - memcmd.setType(type); memcmd.setBank(static_cast<unsigned>(item_val)); } - type = memcmd.getType(); itemnum++; } return memcmd; @@ -90,13 +93,13 @@ void TraceParser::parseFile(MemorySpecification memSpec, std::ifstream& trace, cmd_list.push_back(cmdline); nCommands++; if (nCommands == window) { - counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmd_list, lastupdate); + counters.getCommands(memSpec, cmd_list, lastupdate); nCommands = 0; cmd_list.clear(); } } lastupdate = true; - counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmd_list, lastupdate); + counters.getCommands(memSpec, cmd_list, lastupdate); cmd_list.clear(); pwr_trace.close(); } else { @@ -106,13 +109,13 @@ void TraceParser::parseFile(MemorySpecification memSpec, std::ifstream& trace, cmd_list.push_back(cmdline); nCommands++; if (nCommands == window) { - counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmd_list, lastupdate); + counters.getCommands(memSpec, cmd_list, lastupdate); nCommands = 0; cmd_list.clear(); } } lastupdate = true; - counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmd_list, lastupdate); + counters.getCommands(memSpec, cmd_list, lastupdate); cmd_list.clear(); } counters.clear(); diff --git a/ext/drampower/src/TraceParser.h b/ext/drampower/src/TraceParser.h index cabfcd395..9727b4800 100644 --- a/ext/drampower/src/TraceParser.h +++ b/ext/drampower/src/TraceParser.h @@ -48,6 +48,7 @@ class TraceParser { public: + TraceParser(int64_t nbrOfBanks); // list of parsed commands std::vector<Data::MemCommand> cmd_list; diff --git a/ext/drampower/src/Utils.h b/ext/drampower/src/Utils.h index 4aa8bb220..80f4390c7 100644 --- a/ext/drampower/src/Utils.h +++ b/ext/drampower/src/Utils.h @@ -41,9 +41,7 @@ #include <string> #include <sstream> #include <stdexcept> -#include <typeinfo> -#define MILLION 1000000 template<typename T> T fromString(const std::string& s, @@ -54,7 +52,7 @@ throw(std::runtime_error) T t; if (!(is >> f >> t)) { - throw std::runtime_error("fromString cannot convert " + s); + throw std::runtime_error("Cannot convert string"); } return t; diff --git a/ext/drampower/src/libdrampower/LibDRAMPower.cc b/ext/drampower/src/libdrampower/LibDRAMPower.cc index ac16f948b..47ed15a99 100644 --- a/ext/drampower/src/libdrampower/LibDRAMPower.cc +++ b/ext/drampower/src/libdrampower/LibDRAMPower.cc @@ -52,13 +52,13 @@ libDRAMPower::~libDRAMPower() void libDRAMPower::doCommand(MemCommand::cmds type, int bank, int64_t timestamp) { - MemCommand cmd(type, static_cast<unsigned>(bank), static_cast<double>(timestamp)); + MemCommand cmd(type, static_cast<unsigned>(bank), timestamp); cmdList.push_back(cmd); } void libDRAMPower::updateCounters(bool lastUpdate) { - counters.getCommands(memSpec, memSpec.memArchSpec.nbrOfBanks, cmdList, lastUpdate); + counters.getCommands(memSpec, cmdList, lastUpdate); cmdList.clear(); } @@ -72,6 +72,11 @@ void libDRAMPower::clearState() counters.clear(); } +void libDRAMPower::clearCounters(int64_t timestamp) +{ + counters.clearStats(timestamp); +} + const Data::MemoryPowerModel::Energy& libDRAMPower::getEnergy() const { return mpm.energy; diff --git a/ext/drampower/src/libdrampower/LibDRAMPower.h b/ext/drampower/src/libdrampower/LibDRAMPower.h index 9dea8b0f5..4d9ccefe5 100644 --- a/ext/drampower/src/libdrampower/LibDRAMPower.h +++ b/ext/drampower/src/libdrampower/LibDRAMPower.h @@ -56,6 +56,8 @@ class libDRAMPower { void updateCounters(bool lastUpdate); + void clearCounters(int64_t timestamp); + void clearState(); void calcEnergy(); diff --git a/ext/drampower/test/libdrampowertest/lib_test.cc b/ext/drampower/test/libdrampowertest/lib_test.cc index f382a727e..20d4d9ebf 100644 --- a/ext/drampower/test/libdrampowertest/lib_test.cc +++ b/ext/drampower/test/libdrampowertest/lib_test.cc @@ -79,7 +79,8 @@ int main(int argc, char* argv[]) test.doCommand(MemCommand::RDA,0,210); test.doCommand(MemCommand::ACT,4,232); test.doCommand(MemCommand::WRA,4,247); - test.doCommand(MemCommand::PDN_F_ACT,3,248); + // Need at least tWRAPDEN = AL + CWL + BL/2 + WR + 1 cycles between WR and PDN_F_PRE + test.doCommand(MemCommand::PDN_F_PRE,3,265); //set bool to true when this is the last update of the counters test.updateCounters(true); @@ -106,7 +107,7 @@ int main(int argc, char* argv[]) //test.getEnergy().act_stdby_energy std::cout << "ACT Std Energy" << "\t" << test.getEnergy().act_stdby_energy << endl; //total active standby energy for both ranks - std::cout << "ACT Std Energy total ranks" << "\t" << memSpec.memArchSpec.nbrOfRanks * + std::cout << "ACT Std Energy total ranks" << "\t" << static_cast<double>(memSpec.memArchSpec.nbrOfRanks) * test.getEnergy().act_stdby_energy << "\n" ; std::cout << "PRE Std Energy" << "\t" << test.getEnergy().pre_stdby_energy << endl; std::cout << "Total Energy" << "\t" << test.getEnergy().total_energy << endl; diff --git a/src/mem/SerialLink.py b/src/mem/SerialLink.py index f05f2872d..fd9b0ff6b 100644 --- a/src/mem/SerialLink.py +++ b/src/mem/SerialLink.py @@ -61,3 +61,5 @@ class SerialLink(MemObject): # link belongs to and the number of lanes: num_lanes = Param.Unsigned(1, "Number of parallel lanes inside the serial" "link. (aka. lane width)") + link_speed = Param.UInt64(1, "Gb/s Speed of each parallel lane inside the" + "serial link. (aka. lane speed)") diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh index 6cd72b266..f59528492 100644 --- a/src/mem/dram_ctrl.hh +++ b/src/mem/dram_ctrl.hh @@ -41,6 +41,7 @@ * Ani Udipi * Neha Agarwal * Omar Naji + * Matthias Jung */ /** @@ -862,7 +863,7 @@ class DRAMCtrl : public AbstractMemory */ static bool sortTime(const Data::MemCommand& m1, const Data::MemCommand& m2) { - return m1.getTime() < m2.getTime(); + return m1.getTimeInt64() < m2.getTimeInt64(); }; diff --git a/src/mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.py b/src/mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.py index c7833ee96..5a4f3026e 100644 --- a/src/mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.py +++ b/src/mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.py @@ -53,19 +53,20 @@ class GarnetIntLink_d(BasicIntLink): cxx_header = "mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.hh" # The detailed fixed pipeline bi-directional link include two main # forward links and two backward flow-control links, one per direction - nls = [] + _nls = [] # In uni-directional link - nls.append(NetworkLink_d()); + _nls.append(NetworkLink_d()); # Out uni-directional link - nls.append(NetworkLink_d()); - network_links = VectorParam.NetworkLink_d(nls, "forward links") + _nls.append(NetworkLink_d()); + network_links = VectorParam.NetworkLink_d(_nls, "forward links") - cls = [] + _cls = [] # In uni-directional link - cls.append(CreditLink_d()); + _cls.append(CreditLink_d()); # Out uni-directional link - cls.append(CreditLink_d()); - credit_links = VectorParam.CreditLink_d(cls, "backward flow-control links") + _cls.append(CreditLink_d()); + credit_links = VectorParam.CreditLink_d(_cls, + "backward flow-control links") # Exterior fixed pipeline links between a router and a controller class GarnetExtLink_d(BasicExtLink): @@ -73,16 +74,17 @@ class GarnetExtLink_d(BasicExtLink): cxx_header = "mem/ruby/network/garnet/fixed-pipeline/GarnetLink_d.hh" # The detailed fixed pipeline bi-directional link include two main # forward links and two backward flow-control links, one per direction - nls = [] + _nls = [] # In uni-directional link - nls.append(NetworkLink_d()); + _nls.append(NetworkLink_d()); # Out uni-directional link - nls.append(NetworkLink_d()); - network_links = VectorParam.NetworkLink_d(nls, "forward links") + _nls.append(NetworkLink_d()); + network_links = VectorParam.NetworkLink_d(_nls, "forward links") - cls = [] + _cls = [] # In uni-directional link - cls.append(CreditLink_d()); + _cls.append(CreditLink_d()); # Out uni-directional link - cls.append(CreditLink_d()); - credit_links = VectorParam.CreditLink_d(cls, "backward flow-control links") + _cls.append(CreditLink_d()); + credit_links = VectorParam.CreditLink_d(_cls, + "backward flow-control links") diff --git a/src/mem/serial_link.cc b/src/mem/serial_link.cc index b6cb097b7..25f5291bb 100644 --- a/src/mem/serial_link.cc +++ b/src/mem/serial_link.cc @@ -87,7 +87,9 @@ SerialLink::SerialLink(SerialLinkParams *p) ticksToCycles(p->delay), p->resp_size, p->ranges), masterPort(p->name + ".master", *this, slavePort, ticksToCycles(p->delay), p->req_size), - num_lanes(p->num_lanes) + num_lanes(p->num_lanes), + link_speed(p->link_speed) + { } @@ -153,8 +155,9 @@ SerialLink::SerialLinkMasterPort::recvTimingResp(PacketPtr pkt) // have to wait to receive the whole packet. So we only account for the // deserialization latency. Cycles cycles = delay; - cycles += Cycles(divCeil(pkt->getSize() * 8, serial_link.num_lanes)); - Tick t = serial_link.clockEdge(cycles); + cycles += Cycles(divCeil(pkt->getSize() * 8, serial_link.num_lanes + * serial_link.link_speed)); + Tick t = serial_link.clockEdge(cycles); //@todo: If the processor sends two uncached requests towards HMC and the // second one is smaller than the first one. It may happen that the second @@ -214,7 +217,7 @@ SerialLink::SerialLinkSlavePort::recvTimingReq(PacketPtr pkt) // only. Cycles cycles = delay; cycles += Cycles(divCeil(pkt->getSize() * 8, - serial_link.num_lanes)); + serial_link.num_lanes * serial_link.link_speed)); Tick t = serial_link.clockEdge(cycles); //@todo: If the processor sends two uncached requests towards HMC @@ -301,7 +304,7 @@ SerialLink::SerialLinkMasterPort::trySendTiming() // Make sure bandwidth limitation is met Cycles cycles = Cycles(divCeil(pkt->getSize() * 8, - serial_link.num_lanes)); + serial_link.num_lanes * serial_link.link_speed)); Tick t = serial_link.clockEdge(cycles); serial_link.schedule(sendEvent, std::max(next_req.tick, t)); } @@ -346,7 +349,7 @@ SerialLink::SerialLinkSlavePort::trySendTiming() // Make sure bandwidth limitation is met Cycles cycles = Cycles(divCeil(pkt->getSize() * 8, - serial_link.num_lanes)); + serial_link.num_lanes * serial_link.link_speed)); Tick t = serial_link.clockEdge(cycles); serial_link.schedule(sendEvent, std::max(next_resp.tick, t)); } diff --git a/src/mem/serial_link.hh b/src/mem/serial_link.hh index d4f6ca488..9fbcce335 100644 --- a/src/mem/serial_link.hh +++ b/src/mem/serial_link.hh @@ -312,6 +312,9 @@ class SerialLink : public MemObject /** Number of parallel lanes in this serial link */ unsigned num_lanes; + /** Speed of each link (Gb/s) in this serial link */ + uint64_t link_speed; + public: virtual BaseMasterPort& getMasterPort(const std::string& if_name, diff --git a/tests/quick/se/70.tgen/traffic.cfg b/tests/quick/se/70.tgen/traffic.cfg new file mode 100644 index 000000000..88e642553 --- /dev/null +++ b/tests/quick/se/70.tgen/traffic.cfg @@ -0,0 +1,7 @@ +STATE 0 10000 RANDOM 100 0 134217727 256 1000 1000 0 +STATE 1 1000000 TRACE tests/quick/se/70.tgen/tgen-simple-mem.trc 100 +STATE 2 1000 IDLE +INIT 0 +TRANSITION 0 1 1 +TRANSITION 1 2 1 +TRANSITION 2 0 1 diff --git a/util/systemc/main.cc b/util/systemc/main.cc index 75a77853b..c9fbd48a0 100644 --- a/util/systemc/main.cc +++ b/util/systemc/main.cc @@ -74,6 +74,9 @@ #include "sc_module.hh" #include "stats.hh" +// Defining global string variable decalred in stats.hh +std::string filename; + void usage(const std::string &prog_name) { @@ -289,7 +292,7 @@ void SimControl::run() std::cerr << "Waiting for " << wait_period << "ps for" " SystemC to catch up to gem5\n"; - wait(sc_core::sc_time(wait_period, sc_core::SC_PS)); + wait(sc_core::sc_time::from_value(wait_period)); } config_manager->loadState(*checkpoint); @@ -383,7 +386,11 @@ sc_main(int argc, char **argv) { SimControl sim_control("gem5", argc, argv); + filename = "m5out/stats-systemc.txt"; + sc_core::sc_start(); + CxxConfig::statsDump(); + return EXIT_SUCCESS; } diff --git a/util/systemc/stats.cc b/util/systemc/stats.cc index ef5d9b5d3..54d149474 100644 --- a/util/systemc/stats.cc +++ b/util/systemc/stats.cc @@ -35,6 +35,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Andrew Bardsley + * Matthias Jung + * Abdul Mutaal Ahmad */ /** @@ -45,7 +47,9 @@ * Register with: Stats::registerHandlers(statsReset, statsDump) */ +#include "base/output.hh" #include "base/statistics.hh" +#include "base/stats/text.hh" #include "stats.hh" namespace CxxConfig @@ -56,45 +60,76 @@ void statsPrepare() std::list<Stats::Info *> stats = Stats::statsList(); /* gather_stats -> prepare */ - for (auto i = stats.begin(); i != stats.end(); ++i) - (*i)->prepare(); + for (auto i = stats.begin(); i != stats.end(); ++i){ + Stats::Info *stat = *i; + Stats::VectorInfo *vector = dynamic_cast<Stats::VectorInfo *>(stat); + if (vector){ + (dynamic_cast<Stats::VectorInfo *>(*i))->prepare(); + } + else { + (*i)->prepare(); + } + + } } void statsDump() { - std::cerr << "Stats dump\n"; + bool desc = true; + Stats::Output *output = Stats::initText(filename, desc); Stats::processDumpQueue(); std::list<Stats::Info *> stats = Stats::statsList(); + statsEnable(); statsPrepare(); + output->begin(); /* gather_stats -> convert_value */ for (auto i = stats.begin(); i != stats.end(); ++i) { Stats::Info *stat = *i; - Stats::ScalarInfo *scalar = dynamic_cast<Stats::ScalarInfo *>(stat); + const Stats::ScalarInfo *scalar = dynamic_cast<Stats::ScalarInfo + *>(stat); Stats::VectorInfo *vector = dynamic_cast<Stats::VectorInfo *>(stat); - - if (scalar) { - std::cerr << "SCALAR " << stat->name << ' ' - << scalar->value() << '\n'; - } else if (vector) { - Stats::VResult results = vector->value(); - - unsigned int index = 0; - for (auto e = results.begin(); e != results.end(); ++e) { - std::cerr << "VECTOR " << stat->name << '[' << index - << "] " << (*e) << '\n'; - index++; + const Stats::Vector2dInfo *vector2d = dynamic_cast<Stats::Vector2dInfo + *>(vector); + const Stats::DistInfo *dist = dynamic_cast<Stats::DistInfo *>(stat); + const Stats::VectorDistInfo *vectordist = + dynamic_cast<Stats::VectorDistInfo *>(stat); + const Stats::SparseHistInfo *sparse = + dynamic_cast<Stats::SparseHistInfo *>(stat); + const Stats::InfoProxy <Stats::Vector2d,Stats::Vector2dInfo> *info = + dynamic_cast<Stats::InfoProxy + <Stats::Vector2d,Stats::Vector2dInfo>*>(stat); + + if (vector) { + const Stats::FormulaInfo *formula = dynamic_cast<Stats::FormulaInfo + *>(vector); + if (formula){ + output->visit(*formula); + } else { + const Stats::VectorInfo *vector1 = vector; + output->visit(*vector1); } - std::cerr << "VTOTAL " << stat->name << ' ' - << vector->total() << '\n'; + } else if (vector2d) { + output->visit(*vector2d); + } else if (info){ + output->visit(*info); + } else if (vectordist){ + output->visit(*vectordist); + } else if (dist) { + output->visit(*dist); + } else if (sparse) { + output->visit(*sparse); + } else if (scalar) { + output->visit(*scalar); } else { - std::cerr << "?????? " << stat->name << '\n'; + warn("Stat not dumped: %s\n", stat->name); } } + output->end(); } void statsReset() @@ -108,8 +143,17 @@ void statsEnable() { std::list<Stats::Info *> stats = Stats::statsList(); - for (auto i = stats.begin(); i != stats.end(); ++i) - (*i)->enable(); + for (auto i = stats.begin(); i != stats.end(); ++i){ + Stats::Info *stat = *i; + Stats::VectorInfo *vector = dynamic_cast<Stats::VectorInfo *>(stat); + if (vector){ + (dynamic_cast<Stats::VectorInfo *>(*i))->enable(); + } + else { + (*i)->enable(); + } + + } } } diff --git a/util/systemc/stats.hh b/util/systemc/stats.hh index 360cb6293..9dac960ee 100644 --- a/util/systemc/stats.hh +++ b/util/systemc/stats.hh @@ -35,6 +35,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Andrew Bardsley + * Matthias Jung + * Abdul Mutaal Ahmad */ /** @@ -48,6 +50,8 @@ #ifndef __UTIL_CXX_CONFIG_STATS_H__ #define __UTIL_CXX_CONFIG_STATS_H__ +extern std::string filename; + namespace CxxConfig { diff --git a/util/tlm/README b/util/tlm/README index 126705296..fc620f145 100644 --- a/util/tlm/README +++ b/util/tlm/README @@ -94,3 +94,26 @@ The parameter -o specifies the begining of the memory region (0x80000000). The system should boot now. For conveniance a run_gem5.sh file holds all those commands + + +III. Elastic Trace Setup +======================== + +Elastic traces can also be replayed into the SystemC world. +For more information on elastic traces please refer to: + + - http://www.gem5.org/TraceCPU + + - Exploring System Performance using Elastic Traces: + Fast, Accurate and Portable + R. Jagtap, S. Diestelhorst, A. Hansson, M. Jung, N. Wehn. + IEEE International Conference on Embedded Computer Systems Architectures + Modeling and Simulation (SAMOS), July, 2016, Samos Island, Greece. + +Similar to I. the simulation can be set up with this command: + +> ../../build/ARM/gem5.opt ./tlm_elastic.py + +Then: + +> ./gem5.opt.sc m5out/config.ini diff --git a/util/tlm/main.cc b/util/tlm/main.cc index c06565603..bf442e02b 100644 --- a/util/tlm/main.cc +++ b/util/tlm/main.cc @@ -30,6 +30,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Matthias Jung + * Abdul Mutaal Ahmad */ /** @@ -67,6 +68,9 @@ #include "sim/system.hh" #include "stats.hh" +// Defining global string variable decalred in stats.hh +std::string filename; + void usage(const std::string &prog_name) { std::cerr << "Usage: " << prog_name << ( @@ -296,6 +300,8 @@ sc_main(int argc, char **argv) SimControl sim_control("gem5", argc, argv); Target *memory; + filename = "m5out/stats-tlm.txt"; + tlm::tlm_initiator_socket <> *mem_port = dynamic_cast<tlm::tlm_initiator_socket<> *>( sc_core::sc_find_object("gem5.memory") @@ -319,5 +325,7 @@ sc_main(int argc, char **argv) SC_REPORT_INFO("sc_main", "End of Simulation"); + CxxConfig::statsDump(); + return EXIT_SUCCESS; } diff --git a/util/tlm/tlm_elastic.py b/util/tlm/tlm_elastic.py new file mode 100644 index 000000000..3de0670c0 --- /dev/null +++ b/util/tlm/tlm_elastic.py @@ -0,0 +1,123 @@ +# Copyright (c) 2016, University of Kaiserslautern +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Matthias Jung + +import m5 +import optparse + +from m5.objects import * +from m5.util import addToPath, fatal + +addToPath('../../configs/common/') + +from Caches import * + +# This configuration shows a simple setup of a Elastic Trace Player (eTraceCPU) +# and an external TLM port for SystemC co-simulation. +# +# We assume a DRAM size of 512MB and L1 cache sizes of 32KB. +# +# Base System Architecture: +# +# +-----------+ ^ +# +-------------+ | eTraceCPU | | +# | System Port | +-----+-----+ | +# +------+------+ | $D1 | $I1 | | +# | +--+--+--+--+ | +# | | | | gem5 World +# | | | | (see this file) +# | | | | +# +------v------------v-----v--+ | +# | Membus | v +# +----------------+-----------+ External Port (see sc_port.*) +# | ^ +# +---v---+ | TLM World +# | TLM | | (see sc_target.*) +# +-------+ v +# +# +# Create a system with a Crossbar and an Elastic Trace Player as CPU: + +# Setup System: +system = System(cpu=TraceCPU(cpu_id=0), + mem_mode='timing', + mem_ranges = [AddrRange('512MB')], + cache_line_size = 64) + +# Create a top-level voltage domain: +system.voltage_domain = VoltageDomain() + +# Create a source clock for the system. This is used as the clock period for +# xbar and memory: +system.clk_domain = SrcClockDomain(clock = '1GHz', + voltage_domain = system.voltage_domain) + +# Create a CPU voltage domain: +system.cpu_voltage_domain = VoltageDomain() + +# Create a separate clock domain for the CPUs. In case of Trace CPUs this clock +# is actually used only by the caches connected to the CPU: +system.cpu_clk_domain = SrcClockDomain(clock = '1GHz', + voltage_domain = system.cpu_voltage_domain) + +# Setup CPU and its L1 caches: +system.cpu.createInterruptController() +system.cpu.icache = L1_ICache(size="32kB") +system.cpu.dcache = L1_DCache(size="32kB") +system.cpu.icache.cpu_side = system.cpu.icache_port +system.cpu.dcache.cpu_side = system.cpu.dcache_port + +# Assign input trace files to the eTraceCPU: +system.cpu.instTraceFile="system.cpu.traceListener.inst.gz" +system.cpu.dataTraceFile="system.cpu.traceListener.data.gz" + +# Setting up L1 BUS: +system.membus = IOXBar(width = 16) +system.physmem = SimpleMemory() # This must be instantiated, even if not needed + +# Create a external TLM port: +system.tlm = ExternalSlave() +system.tlm.addr_ranges = [AddrRange('512MB')] +system.tlm.port_type = "tlm" +system.tlm.port_data = "memory" + +# Connect everything: +system.membus = SystemXBar() +system.system_port = system.membus.slave +system.cpu.icache.mem_side = system.membus.slave +system.cpu.dcache.mem_side = system.membus.slave +system.membus.master = system.tlm.port + +# Start the simulation: +root = Root(full_system = False, system = system) +root.system.mem_mode = 'timing' +m5.instantiate() +m5.simulate() #Simulation time specified later on commandline |