diff options
author | Éder F. Zulian <zulian@eit.uni-kl.de> | 2017-11-22 17:03:29 +0100 |
---|---|---|
committer | Éder F. Zulian <zulian@eit.uni-kl.de> | 2017-12-05 08:42:59 +0000 |
commit | 66909dd5a2cf4cec0732eeeb389e3ff739fe58ed (patch) | |
tree | 7a83d029b1a33ad92b9af4ffaaa73ecaa254ed89 | |
parent | 43a1ea88b89aa3611d3b73cc0d4dc5de3f1f5e6b (diff) | |
download | gem5-66909dd5a2cf4cec0732eeeb389e3ff739fe58ed.tar.xz |
config, mem, hmc: fix HMC test script
This patch keeps the logic behind the HMC model implementation untouched.
Additional changes:
- simple hello world script using HMC (SE simulation)
Usage examples:
./build/ARM/gem5.opt configs/example/hmctest.py
./build/ARM/gem5.opt configs/example/hmctest.py --enable-global-monitor --enable-link-monitor --arch=same
./build/ARM/gem5.opt configs/example/hmctest.py --enable-global-monitor --enable-link-monitor --arch=mixed
./build/ARM/gem5.opt configs/example/hmc_hello.py
./build/ARM/gem5.opt configs/example/hmc_hello.py --enable-global-monitor --enable-link-monitor
Change-Id: I64eb6c9abb45376b6ed72722926acddd50765394
Reviewed-on: https://gem5-review.googlesource.com/6061
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
-rw-r--r-- | configs/common/HMC.py | 368 | ||||
-rw-r--r-- | configs/common/MemConfig.py | 7 | ||||
-rw-r--r-- | configs/example/hmc_hello.py | 83 | ||||
-rw-r--r-- | configs/example/hmc_tgen.cfg | 36 | ||||
-rw-r--r-- | configs/example/hmctest.py | 248 |
5 files changed, 433 insertions, 309 deletions
diff --git a/configs/common/HMC.py b/configs/common/HMC.py index e43cbddfc..10d8a7185 100644 --- a/configs/common/HMC.py +++ b/configs/common/HMC.py @@ -122,235 +122,299 @@ # 2 Crossbars are connected to only local vaults. From other 2 crossbar, a # request can be forwarded to any other vault. -import optparse +import argparse import m5 from m5.objects import * +from m5.util import * -# A single Hybrid Memory Cube (HMC) -class HMCSystem(SubSystem): - #*****************************CROSSBAR PARAMETERS************************* + +def add_options(parser): + # *****************************CROSSBAR PARAMETERS************************* # Flit size of the main interconnect [1] - xbar_width = Param.Unsigned(32, "Data width of the main XBar (Bytes)") + parser.add_argument("--xbar-width", default=32, action="store", type=int, + help="Data width of the main XBar (Bytes)") # Clock frequency of the main interconnect [1] # This crossbar, is placed on the logic-based of the HMC and it has its # own voltage and clock domains, different from the DRAM dies or from the # host. - xbar_frequency = Param.Frequency('1GHz', "Clock Frequency of the main " - "XBar") + parser.add_argument("--xbar-frequency", default='1GHz', type=str, + help="Clock Frequency of the main XBar") # Arbitration latency of the HMC XBar [1] - xbar_frontend_latency = Param.Cycles(1, "Arbitration latency of the XBar") + parser.add_argument("--xbar-frontend-latency", default=1, action="store", + type=int, help="Arbitration latency of the XBar") # Latency to forward a packet via the interconnect [1](two levels of FIFOs # at the input and output of the inteconnect) - xbar_forward_latency = Param.Cycles(2, "Forward latency of the XBar") + parser.add_argument("--xbar-forward-latency", default=2, action="store", + type=int, help="Forward latency of the XBar") # Latency to forward a response via the interconnect [1](two levels of # FIFOs at the input and output of the inteconnect) - xbar_response_latency = Param.Cycles(2, "Response latency of the XBar") + parser.add_argument("--xbar-response-latency", default=2, action="store", + type=int, help="Response latency of the XBar") # number of cross which connects 16 Vaults to serial link[7] - number_mem_crossbar = Param.Unsigned(4, "Number of crossbar in HMC" - ) + parser.add_argument("--number-mem-crossbar", default=4, action="store", + type=int, help="Number of crossbar in HMC") - #*****************************SERIAL LINK PARAMETERS*********************** + # *****************************SERIAL LINK PARAMETERS********************** # Number of serial links controllers [1] - num_links_controllers = Param.Unsigned(4, "Number of serial links") + parser.add_argument("--num-links-controllers", default=4, action="store", + type=int, help="Number of serial links") # Number of packets (not flits) to store at the request side of the serial # link. This number should be adjusted to achive required bandwidth - link_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer " - "at the request side of the serial link") + parser.add_argument("--link-buffer-size-req", default=10, action="store", + type=int, help="Number of packets to buffer at the\ + request side of the serial link") # Number of packets (not flits) to store at the response side of the serial # link. This number should be adjusted to achive required bandwidth - link_buffer_size_rsp = Param.Unsigned(10, "Number of packets to buffer " - "at the response side of the serial link") + parser.add_argument("--link-buffer-size-rsp", default=10, action="store", + type=int, help="Number of packets to buffer at the\ + response side of the serial link") # Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus # the PCB trace latency (3ns Estimated based on [5]) - link_latency = Param.Latency('4.6ns', "Latency of the serial links") + parser.add_argument("--link-latency", default='4.6ns', type=str, + help="Latency of the serial links") # Clock frequency of the each serial link(SerDes) [1] - link_frequency = Param.Frequency('10GHz', "Clock Frequency of the serial" - "links") + parser.add_argument("--link-frequency", default='10GHz', type=str, + help="Clock Frequency of the serial links") # Clock frequency of serial link Controller[6] # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] / # data_path_width * 10^6 # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz - link_controller_frequency = Param.Frequency('625MHz', - "Clock Frequency of the link controller") + parser.add_argument("--link-controller-frequency", default='625MHz', + type=str, help="Clock Frequency of the link\ + controller") # Latency of the serial link controller to process the packets[1][6] # (ClockDomain = 625 Mhz ) # used here for calculations only - link_ctrl_latency = Param.Cycles(4, "The number of cycles required for the" - "controller to process the packet") + parser.add_argument("--link-ctrl-latency", default=4, action="store", + type=int, help="The number of cycles required for the\ + controller to process the packet") # total_ctrl_latency = link_ctrl_latency + link_latency # total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns - total_ctrl_latency = Param.Latency('11ns', "The latency experienced by" - "every packet regardless of size of packet") + parser.add_argument("--total-ctrl-latency", default='11ns', type=str, + help="The latency experienced by every packet\ + regardless of size of packet") # Number of parallel lanes in each serial link [1] - num_lanes_per_link = Param.Unsigned( 16, "Number of lanes per each link") + parser.add_argument("--num-lanes-per-link", default=16, action="store", + type=int, help="Number of lanes per each link") # Number of serial links [1] - num_serial_links = Param.Unsigned(4, "Number of serial links") + parser.add_argument("--num-serial-links", default=4, action="store", + type=int, help="Number of serial links") # speed of each lane of serial link - SerDes serial interface 10 Gb/s - serial_link_speed = Param.UInt64(10, "Gbs/s speed of each lane of" - "serial link") + parser.add_argument("--serial-link-speed", default=10, action="store", + type=int, help="Gbs/s speed of each lane of serial\ + link") + + # address range for each of the serial links + parser.add_argument("--serial-link-addr-range", default='1GB', type=str, + help="memory range for each of the serial links.\ + Default: 1GB") - #*****************************PERFORMANCE MONITORING************************ + # *****************************PERFORMANCE MONITORING********************* # The main monitor behind the HMC Controller - enable_global_monitor = Param.Bool(False, "The main monitor behind the " - "HMC Controller") + parser.add_argument("--enable-global-monitor", action="store_true", + help="The main monitor behind the HMC Controller") # The link performance monitors - enable_link_monitor = Param.Bool(False, "The link monitors" ) + parser.add_argument("--enable-link-monitor", action="store_true", + help="The link monitors") # link aggregator enable - put a cross between buffers & links - enable_link_aggr = Param.Bool(False, "The crossbar between port and " - "Link Controller") + parser.add_argument("--enable-link-aggr", action="store_true", help="The\ + crossbar between port and Link Controller") - enable_buff_div = Param.Bool(True, "Memory Range of Buffer is" - "divided between total range") + parser.add_argument("--enable-buff-div", action="store_true", + help="Memory Range of Buffer is ivided between total\ + range") - #*****************************HMC ARCHITECTURE ************************ + # *****************************HMC ARCHITECTURE ************************** # Memory chunk for 16 vault - numbers of vault / number of crossbars - mem_chunk = Param.Unsigned(4, "Chunk of memory range for each cross bar " - "in arch 0") + parser.add_argument("--mem-chunk", default=4, action="store", type=int, + help="Chunk of memory range for each cross bar in\ + arch 0") # size of req buffer within crossbar, used for modelling extra latency # when the reuqest go to non-local vault - xbar_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer " - "at the request side of the crossbar") + parser.add_argument("--xbar-buffer-size-req", default=10, action="store", + type=int, help="Number of packets to buffer at the\ + request side of the crossbar") # size of response buffer within crossbar, used for modelling extra latency # when the response received from non-local vault - xbar_buffer_size_resp = Param.Unsigned(10, "Number of packets to buffer " - "at the response side of the crossbar") - -# configure host system with Serial Links -def config_host_hmc(options, system): - - system.hmc_host=HMCSystem() - - try: - system.hmc_host.enable_global_monitor = options.enable_global_monitor - except: - pass; - - try: - system.hmc_host.enable_link_monitor = options.enable_link_monitor - except: - pass; - - # Serial link Controller with 16 SerDes links at 10 Gbps - # with serial link ranges w.r.t to architecture - system.hmc_host.seriallink = [SerialLink(ranges = options.ser_ranges[i], - req_size=system.hmc_host.link_buffer_size_req, - resp_size=system.hmc_host.link_buffer_size_rsp, - num_lanes=system.hmc_host.num_lanes_per_link, - link_speed=system.hmc_host.serial_link_speed, - delay=system.hmc_host.total_ctrl_latency) - for i in xrange(system.hmc_host.num_serial_links)] + parser.add_argument("--xbar-buffer-size-resp", default=10, action="store", + type=int, help="Number of packets to buffer at the\ + response side of the crossbar") + # HMC device architecture. It affects the HMC host controller as well + parser.add_argument("--arch", type=str, choices=["same", "distributed", + "mixed"], default="distributed", help="same: HMC with\ + 4 links, all with same range.\ndistributed: HMC with\ + 4 links with distributed range.\nmixed: mixed with\ + same and distributed range.\nDefault: distributed") + # HMC device - number of vaults + parser.add_argument("--hmc-dev-num-vaults", default=16, action="store", + type=int, help="number of independent vaults within\ + the HMC device. Note: each vault has a memory\ + controller (valut controller)\nDefault: 16") + # HMC device - vault capacity or size + parser.add_argument("--hmc-dev-vault-size", default='256MB', type=str, + help="vault storage capacity in bytes. Default:\ + 256MB") + parser.add_argument("--mem-type", type=str, choices=["HMC_2500_1x32"], + default="HMC_2500_1x32", help="type of HMC memory to\ + use. Default: HMC_2500_1x32") + parser.add_argument("--mem-channels", default=1, action="store", type=int, + help="Number of memory channels") + parser.add_argument("--mem-ranks", default=1, action="store", type=int, + help="Number of ranks to iterate across") + parser.add_argument("--burst-length", default=256, action="store", + type=int, help="burst length in bytes. Note: the\ + cache line size will be set to this value.\nDefault:\ + 256") + + +# configure HMC host controller +def config_hmc_host_ctrl(opt, system): + + # create HMC host controller + system.hmc_host = SubSystem() + + # Create additional crossbar for arch1 + if opt.arch == "distributed" or opt.arch == "mixed": + clk = '100GHz' + vd = VoltageDomain(voltage='1V') + # Create additional crossbar for arch1 + system.membus = NoncoherentXBar(width=8) + system.membus.badaddr_responder = BadAddr() + system.membus.default = Self.badaddr_responder.pio + system.membus.width = 8 + system.membus.frontend_latency = 3 + system.membus.forward_latency = 4 + system.membus.response_latency = 2 + cd = SrcClockDomain(clock=clk, voltage_domain=vd) + system.membus.clk_domain = cd + + # create memory ranges for the serial links + slar = convert.toMemorySize(opt.serial_link_addr_range) + # Memmory ranges of serial link for arch-0. Same as the ranges of vault + # controllers (4 vaults to 1 serial link) + if opt.arch == "same": + ser_ranges = [AddrRange(0, (4*slar)-1) for i in + range(opt.num_serial_links)] + # Memmory ranges of serial link for arch-1. Distributed range accross + # links + if opt.arch == "distributed": + ser_ranges = [AddrRange(i*slar, ((i+1)*slar)-1) for i in + range(opt.num_serial_links)] + # Memmory ranges of serial link for arch-2 'Mixed' address distribution + # over links + if opt.arch == "mixed": + ser_range0 = AddrRange(0, (1*slar)-1) + ser_range1 = AddrRange(1*slar, 2*slar-1) + ser_range2 = AddrRange(0, (4*slar)-1) + ser_range3 = AddrRange(0, (4*slar)-1) + ser_ranges = [ser_range0, ser_range1, ser_range2, ser_range3] + + # Serial link Controller with 16 SerDes links at 10 Gbps with serial link + # ranges w.r.t to architecture + sl = [SerialLink(ranges=ser_ranges[i], + req_size=opt.link_buffer_size_req, + resp_size=opt.link_buffer_size_rsp, + num_lanes=opt.num_lanes_per_link, + link_speed=opt.serial_link_speed, + delay=opt.total_ctrl_latency) for i in + xrange(opt.num_serial_links)] + system.hmc_host.seriallink = sl # enable global monitor - if system.hmc_host.enable_global_monitor: - system.hmc_host.lmonitor = [ CommMonitor() - for i in xrange(system.hmc_host.num_serial_links)] + if opt.enable_global_monitor: + system.hmc_host.lmonitor = [CommMonitor() for i in + xrange(opt.num_serial_links)] # set the clock frequency for serial link - for i in xrange(system.hmc_host.num_serial_links): - system.hmc_host.seriallink[i].clk_domain = SrcClockDomain(clock=system. - hmc_host.link_controller_frequency, voltage_domain= - VoltageDomain(voltage = '1V')) + for i in xrange(opt.num_serial_links): + clk = opt.link_controller_frequency + vd = VoltageDomain(voltage='1V') + scd = SrcClockDomain(clock=clk, voltage_domain=vd) + system.hmc_host.seriallink[i].clk_domain = scd # Connect membus/traffic gen to Serial Link Controller for differrent HMC # architectures - if options.arch == "distributed": - for i in xrange(system.hmc_host.num_links_controllers): - if system.hmc_host.enable_global_monitor: - system.membus.master = system.hmc_host.lmonitor[i].slave - system.hmc_host.lmonitor[i].master = \ - system.hmc_host.seriallink[i].slave + hh = system.hmc_host + if opt.arch == "distributed": + mb = system.membus + for i in xrange(opt.num_links_controllers): + if opt.enable_global_monitor: + mb.master = hh.lmonitor[i].slave + hh.lmonitor[i].master = hh.seriallink[i].slave else: - system.membus.master = system.hmc_host.seriallink[i].slave - if options.arch == "mixed": - if system.hmc_host.enable_global_monitor: - system.membus.master = system.hmc_host.lmonitor[0].slave - system.hmc_host.lmonitor[0].master = \ - system.hmc_host.seriallink[0].slave - - system.membus.master = system.hmc_host.lmonitor[1].slave - system.hmc_host.lmonitor[1].master = \ - system.hmc_host.seriallink[1].slave - - system.tgen[2].port = system.hmc_host.lmonitor[2].slave - system.hmc_host.lmonitor[2].master = \ - system.hmc_host.seriallink[2].slave - - system.tgen[3].port = system.hmc_host.lmonitor[3].slave - system.hmc_host.lmonitor[3].master = \ - system.hmc_host.seriallink[3].slave + mb.master = hh.seriallink[i].slave + if opt.arch == "mixed": + mb = system.membus + if opt.enable_global_monitor: + mb.master = hh.lmonitor[0].slave + hh.lmonitor[0].master = hh.seriallink[0].slave + mb.master = hh.lmonitor[1].slave + hh.lmonitor[1].master = hh.seriallink[1].slave else: - system.membus.master = system.hmc_host.seriallink[0].slave - system.membus.master = system.hmc_host.seriallink[1].slave - system.tgen[2].port = system.hmc_host.seriallink[2].slave - system.tgen[3].port = system.hmc_host.seriallink[3].slave - if options.arch == "same" : - for i in xrange(system.hmc_host.num_links_controllers): - if system.hmc_host.enable_global_monitor: - system.tgen[i].port = system.hmc_host.lmonitor[i].slave - system.hmc_host.lmonitor[i].master = \ - system.hmc_host.seriallink[i].slave - else: - system.tgen[i].port = system.hmc_host.seriallink[i].slave + mb.master = hh.seriallink[0].slave + mb.master = hh.seriallink[1].slave - return system + if opt.arch == "same": + for i in xrange(opt.num_links_controllers): + if opt.enable_global_monitor: + hh.lmonitor[i].master = hh.seriallink[i].slave -# Create an HMC device and attach it to the current system -def config_hmc(options, system, hmc_host): + return system - # Create HMC device - system.hmc_dev = HMCSystem() - # Global monitor - try: - system.hmc_dev.enable_global_monitor = options.enable_global_monitor - except: - pass; +# Create an HMC device +def config_hmc_dev(opt, system, hmc_host): - try: - system.hmc_dev.enable_link_monitor = options.enable_link_monitor - except: - pass; + # create HMC device + system.hmc_dev = SubSystem() + # create memory ranges for the vault controllers + arv = convert.toMemorySize(opt.hmc_dev_vault_size) + addr_ranges_vaults = [AddrRange(i*arv, ((i+1)*arv-1)) for i in + range(opt.hmc_dev_num_vaults)] + system.mem_ranges = addr_ranges_vaults - if system.hmc_dev.enable_link_monitor: - system.hmc_dev.lmonitor = [ CommMonitor() - for i in xrange(system.hmc_dev.num_links_controllers)] + if opt.enable_link_monitor: + lm = [CommMonitor() for i in xrange(opt.num_links_controllers)] + system.hmc_dev.lmonitor = lm # 4 HMC Crossbars located in its logic-base (LoB) - system.hmc_dev.xbar = [ NoncoherentXBar(width=system.hmc_dev.xbar_width, - frontend_latency=system.hmc_dev.xbar_frontend_latency, - forward_latency=system.hmc_dev.xbar_forward_latency, - response_latency=system.hmc_dev.xbar_response_latency ) - for i in xrange(system.hmc_host.number_mem_crossbar)] - - for i in xrange(system.hmc_dev.number_mem_crossbar): - system.hmc_dev.xbar[i].clk_domain = SrcClockDomain( - clock=system.hmc_dev.xbar_frequency,voltage_domain= - VoltageDomain(voltage='1V')) + xb = [NoncoherentXBar(width=opt.xbar_width, + frontend_latency=opt.xbar_frontend_latency, + forward_latency=opt.xbar_forward_latency, + response_latency=opt.xbar_response_latency) for i in + xrange(opt.number_mem_crossbar)] + system.hmc_dev.xbar = xb + + for i in xrange(opt.number_mem_crossbar): + clk = opt.xbar_frequency + vd = VoltageDomain(voltage='1V') + scd = SrcClockDomain(clock=clk, voltage_domain=vd) + system.hmc_dev.xbar[i].clk_domain = scd # Attach 4 serial link to 4 crossbar/s - for i in xrange(system.hmc_dev.num_serial_links): - if system.hmc_dev.enable_link_monitor: + for i in xrange(opt.num_serial_links): + if opt.enable_link_monitor: system.hmc_host.seriallink[i].master = \ system.hmc_dev.lmonitor[i].slave system.hmc_dev.lmonitor[i].master = system.hmc_dev.xbar[i].slave @@ -359,14 +423,13 @@ def config_hmc(options, system, hmc_host): # Connecting xbar with each other for request arriving at the wrong xbar, # then it will be forward to correct xbar. Bridge is used to connect xbars - if options.arch == "same": + if opt.arch == "same": numx = len(system.hmc_dev.xbar) # create a list of buffers - system.hmc_dev.buffers = [ Bridge( - req_size=system.hmc_dev.xbar_buffer_size_req, - resp_size=system.hmc_dev.xbar_buffer_size_resp) - for i in xrange(numx * (system.hmc_dev.mem_chunk - 1))] + system.hmc_dev.buffers = [Bridge(req_size=opt.xbar_buffer_size_req, + resp_size=opt.xbar_buffer_size_resp) + for i in xrange(numx*(opt.mem_chunk-1))] # Buffer iterator it = iter(range(len(system.hmc_dev.buffers))) @@ -384,8 +447,8 @@ def config_hmc(options, system, hmc_host): # Change the default values for ranges of bridge system.hmc_dev.buffers[index].ranges = system.mem_ranges[ - j * int(system.hmc_dev.mem_chunk): - (j + 1) * int(system.hmc_dev.mem_chunk)] + j * int(opt.mem_chunk): + (j + 1) * int(opt.mem_chunk)] # Connect the bridge between corssbars system.hmc_dev.xbar[i].master = system.hmc_dev.buffers[ @@ -398,8 +461,7 @@ def config_hmc(options, system, hmc_host): # Two crossbars are connected to all other crossbars-Other 2 vault # can only direct traffic to it local vaults - if options.arch == "mixed": - + if opt.arch == "mixed": system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4]) system.hmc_dev.xbar[3].master = system.hmc_dev.buffer30.slave system.hmc_dev.buffer30.master = system.hmc_dev.xbar[0].slave @@ -412,7 +474,6 @@ def config_hmc(options, system, hmc_host): system.hmc_dev.xbar[3].master = system.hmc_dev.buffer32.slave system.hmc_dev.buffer32.master = system.hmc_dev.xbar[2].slave - system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4]) system.hmc_dev.xbar[2].master = system.hmc_dev.buffer20.slave system.hmc_dev.buffer20.master = system.hmc_dev.xbar[0].slave @@ -424,4 +485,3 @@ def config_hmc(options, system, hmc_host): system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16]) system.hmc_dev.xbar[2].master = system.hmc_dev.buffer23.slave system.hmc_dev.buffer23.master = system.hmc_dev.xbar[3].slave - diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py index 3605c2144..475bbeb32 100644 --- a/configs/common/MemConfig.py +++ b/configs/common/MemConfig.py @@ -164,8 +164,8 @@ def config_mem(options, system): opt_mem_ranks = getattr(options, "mem_ranks", None) if opt_mem_type == "HMC_2500_1x32": - HMChost = HMC.config_host_hmc(options, system) - HMC.config_hmc(options, system, HMChost.hmc_host) + HMChost = HMC.config_hmc_host_ctrl(options, system) + HMC.config_hmc_dev(options, system, HMChost.hmc_host) subsystem = system.hmc_dev xbar = system.hmc_dev.xbar else: @@ -234,5 +234,8 @@ def config_mem(options, system): for i in xrange(len(subsystem.mem_ctrls)): if opt_mem_type == "HMC_2500_1x32": subsystem.mem_ctrls[i].port = xbar[i/4].master + # Set memory device size. There is an independent controller for + # each vault. All vaults are same size. + subsystem.mem_ctrls[i].device_size = options.hmc_dev_vault_size else: subsystem.mem_ctrls[i].port = xbar.master diff --git a/configs/example/hmc_hello.py b/configs/example/hmc_hello.py new file mode 100644 index 000000000..d9a6c0f9e --- /dev/null +++ b/configs/example/hmc_hello.py @@ -0,0 +1,83 @@ +# Copyright (c) 2017, University of Kaiserslautern +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Author: Éder F. Zulian + +import sys +import argparse + +import m5 +from m5.objects import * +from m5.util import * +addToPath('../') +from common import MemConfig +from common import HMC + + +pd = "Simple 'hello world' example using HMC as main memory" +parser = argparse.ArgumentParser(description=pd) +HMC.add_options(parser) +options = parser.parse_args() +# create the system we are going to simulate +system = System() +# use timing mode for the interaction between master-slave ports +system.mem_mode = 'timing' +# set the clock fequency of the system +clk = '1GHz' +vd = VoltageDomain(voltage='1V') +system.clk_domain = SrcClockDomain(clock=clk, voltage_domain=vd) +# create a simple CPU +system.cpu = TimingSimpleCPU() +# config memory system +MemConfig.config_mem(options, system) +# hook the CPU ports up to the membus +system.cpu.icache_port = system.membus.slave +system.cpu.dcache_port = system.membus.slave +# create the interrupt controller for the CPU and connect to the membus +system.cpu.createInterruptController() +# connect special port in the system to the membus. This port is a +# functional-only port to allow the system to read and write memory. +system.system_port = system.membus.slave +# get ISA for the binary to run. +isa = str(m5.defines.buildEnv['TARGET_ISA']).lower() +# run 'hello' and use the compiled ISA to find the binary +binary = 'tests/test-progs/hello/bin/' + isa + '/linux/hello' +# create a process for a simple "Hello World" application +process = Process() +# cmd is a list which begins with the executable (like argv) +process.cmd = [binary] +# set the cpu workload +system.cpu.workload = process +# create thread contexts +system.cpu.createThreads() +# set up the root SimObject +root = Root(full_system=False, system=system) +m5.instantiate() +m5.simulate() diff --git a/configs/example/hmc_tgen.cfg b/configs/example/hmc_tgen.cfg new file mode 100644 index 000000000..1dc0ffe10 --- /dev/null +++ b/configs/example/hmc_tgen.cfg @@ -0,0 +1,36 @@ +# This format supports comments using the '#' symbol as the leading +# character of the line +# +# The file format contains [STATE]+ [INIT] [TRANSITION]+ in any order, +# where the states are the nodes in the graph, init describes what +# state to start in, and transition describes the edges of the graph. +# +# STATE <id> <duration (ticks)> <type> +# +# State IDLE idles +# +# States LINEAR and RANDOM have additional: +# STATE = [LINEAR, RANDOM] +# <id> +# <duration (ticks)> +# <type> +# <percent reads> +# <start addr> +# <end addr> +# <access size (bytes)> +# <min period (ticks)> +# <max period (ticks)> +# <data limit (bytes)> +# +# State TRACE plays back a pre-recorded trace once +# +# Addresses are expressed as decimal numbers, both in the +# configuration and the trace file. The period in the linear and +# random state is from a uniform random distribution over the +# interval. If a specific value is desired, then the min and max can +# be set to the same value. +STATE 0 100 IDLE +STATE 1 10000000 LINEAR 100 2147483648 2181038080 64 30000 30000 0 +INIT 0 +TRANSITION 0 1 1 +TRANSITION 1 0 1 diff --git a/configs/example/hmctest.py b/configs/example/hmctest.py index 3e0fa124a..9ed3ef45d 100644 --- a/configs/example/hmctest.py +++ b/configs/example/hmctest.py @@ -1,171 +1,113 @@ -import optparse import sys +import argparse import subprocess +from pprint import pprint import m5 from m5.objects import * -from m5.util import addToPath +from m5.util import * addToPath('../') from common import MemConfig from common import HMC -parser = optparse.OptionParser() -# Use a HMC_2500_1x32 (1 channel, 32-bits wide) by default -parser.add_option("--mem-type", type = "choice", default = "HMC_2500_1x32", - choices = MemConfig.mem_names(), - help = "type of memory to use") - -parser.add_option("--ranks", "-r", type = "int", default = 1, - help = "Number of ranks to iterate across") - -parser.add_option("--rd_perc", type ="int", default=100, - help = "Percentage of read commands") - -parser.add_option("--mode", type ="choice", default ="DRAM", - choices = ["DRAM", "DRAM_ROTATE", "RANDOM"], - help = "DRAM: Random traffic; \ - DRAM_ROTATE: Traffic rotating across banks and ranks" - ) - -parser.add_option("--addr_map", type ="int", default = 1, - help = "0: RoCoRaBaCh; 1: RoRaBaCoCh/RoRaBaChCo") - -parser.add_option("--arch", type = "choice", default = "distributed", - choices = ["same", "distributed", "mixed"], - help = "same: HMC-4 links with same range\ - distributed: HMC-4 links with distributed range\ - mixed: mixed with same & distributed range") - -parser.add_option("--linkaggr", type = "int", default = 0, - help = "1: enable link crossbar, 0: disable link crossbar") - -parser.add_option("--num_cross", type = "int", default = 4, - help = "1: number of crossbar in HMC=1;\ - 4: number of crossbar = 4") - -parser.add_option("--tlm-memory", type = "string", - help="use external port for SystemC TLM cosimulation") - -parser.add_option("--elastic-trace-en", action ="store_true", - help = """Enable capture of data dependency and instruction - fetch traces using elastic trace probe.""") - -(options, args) = parser.parse_args() - -if args: - print "Error: script doesn't take any positional arguments" - sys.exit(1) - -system = System() -system.clk_domain = SrcClockDomain(clock='100GHz', - voltage_domain= - VoltageDomain(voltage = '1V')) -# Create additional crossbar for arch1 -if options.arch == "distributed" or options.arch == "mixed" : - system.membus = NoncoherentXBar( width=8 ) - system.membus.badaddr_responder = BadAddr() - system.membus.default = Self.badaddr_responder.pio - system.membus.width = 8 - system.membus.frontend_latency = 3 - system.membus.forward_latency = 4 - system.membus.response_latency = 2 - - system.membus.clk_domain = SrcClockDomain(clock='100GHz', voltage_domain= - VoltageDomain(voltage = '1V')) - -# we are considering 4GB HMC device with following parameters +def add_options(parser): + parser.add_argument("--external-memory-system", default=0, action="store", + type=int, help="External memory system") + # TLM related options, currently optional in configs/common/MemConfig.py + parser.add_argument("--tlm-memory", action="store_true", help="use\ + external port for SystemC TLM co-simulation. Default:\ + no") + # Elastic traces related options, currently optional in + # configs/common/MemConfig.py + parser.add_argument("--elastic-trace-en", action="store_true", + help="enable capture of data dependency and\ + instruction fetch traces using elastic trace\ + probe.\nDefault: no") + # Options related to traffic generation + parser.add_argument("--num-tgen", default=4, action="store", type=int, + choices=[4], help="number of traffic generators.\ + Right now this script supports only 4.\nDefault: 4") + parser.add_argument("--tgen-cfg-file", + default="./configs/example/hmc_tgen.cfg", + type=str, help="Traffic generator(s) configuration\ + file. Note: this script uses the same configuration\ + file for all traffic generators") + + +# considering 4GB HMC device with following parameters # hmc_device_size = '4GB' -# hmc_num_vaults = 16 # hmc_vault_size = '256MB' # hmc_stack_size = 8 # hmc_bank_in_stack = 2 # hmc_bank_size = '16MB' # hmc_bank_in_vault = 16 - -# determine the burst length in bytes -burst_size = 256 -num_serial_links = 4 -num_vault_ctrl = 16 -options.mem_channels = 1 -options.external_memory_system = 0 -options.mem_ranks=1 -stride_size = burst_size -system.cache_line_size = burst_size - -# Enable performance monitoring -options.enable_global_monitor = True -options.enable_link_monitor = False - -# Bytes used for calculations -oneGBytes = 1024 * 1024 * 1024 -oneMBytes = 1024 * 1024 - -# Memory ranges of 16 vault controller - Total_HMC_size / 16 -mem_range_vault = [ AddrRange(i * 256 * oneMBytes, ((i + 1) * 256 * oneMBytes) - - 1) - for i in range(num_vault_ctrl)] - -# Memmory ranges of serial link for arch-0 -# Same as the ranges of vault controllers - 4 vault - to - 1 serial link -if options.arch == "same": - ser_range = [ AddrRange(0, (4 * oneGBytes) - 1) - for i in range(num_serial_links)] - options.ser_ranges = ser_range - -# Memmory ranges of serial link for arch-1 -# Distributed range accross links -if options.arch == "distributed": - ser_range = [ AddrRange(i * oneGBytes, ((i + 1) * oneGBytes) - 1) - for i in range(num_serial_links)] - options.ser_ranges = ser_range - -# Memmory ranges of serial link for arch-2 -# "Mixed" address distribution over links -if options.arch == "mixed": - ser_range0 = AddrRange(0 , (1 * oneGBytes) - 1) - ser_range1 = AddrRange(1 * oneGBytes , (2 * oneGBytes) - 1) - ser_range2 = AddrRange(0 , (4 * oneGBytes) - 1) - ser_range3 = AddrRange(0 , (4 * oneGBytes) - 1) - options.ser_ranges = [ser_range0, ser_range1, ser_range2, ser_range3] - -# Assign ranges of vault controller to system ranges -system.mem_ranges = mem_range_vault - -# open traffic generator -cfg_file_name = "./tests/quick/se/70.tgen/traffic.cfg" -cfg_file = open(cfg_file_name, 'r') - -# number of traffic generator -np = 4 -# create a traffic generator, and point it to the file we just created -system.tgen = [ TrafficGen(config_file = cfg_file_name) for i in xrange(np)] - -# Config memory system with given HMC arch -MemConfig.config_mem(options, system) - -if options.arch == "distributed": - for i in xrange(np): - system.tgen[i].port = system.membus.slave - # connect the system port even if it is not used in this example - system.system_port = system.membus.slave - -if options.arch == "mixed": - for i in xrange(int(np/2)): - system.tgen[i].port = system.membus.slave - # connect the system port even if it is not used in this example - system.system_port = system.membus.slave - - -# run Forrest, run! -root = Root(full_system = False, system = system) -root.system.mem_mode = 'timing' - -m5.instantiate() -m5.simulate(10000000000) - -m5.stats.dump() - -print "Done!" +def build_system(options): + # create the system we are going to simulate + system = System() + # use timing mode for the interaction between master-slave ports + system.mem_mode = 'timing' + # set the clock fequency of the system + clk = '100GHz' + vd = VoltageDomain(voltage='1V') + system.clk_domain = SrcClockDomain(clock=clk, voltage_domain=vd) + # add traffic generators to the system + system.tgen = [TrafficGen(config_file=options.tgen_cfg_file) for i in + xrange(options.num_tgen)] + # Config memory system with given HMC arch + MemConfig.config_mem(options, system) + # Connect the traffic generatiors + if options.arch == "distributed": + for i in xrange(options.num_tgen): + system.tgen[i].port = system.membus.slave + # connect the system port even if it is not used in this example + system.system_port = system.membus.slave + if options.arch == "mixed": + for i in xrange(int(options.num_tgen/2)): + system.tgen[i].port = system.membus.slave + hh = system.hmc_host + if options.enable_global_monitor: + system.tgen[2].port = hh.lmonitor[2].slave + hh.lmonitor[2].master = hh.seriallink[2].slave + system.tgen[3].port = hh.lmonitor[3].slave + hh.lmonitor[3].master = hh.seriallink[3].slave + else: + system.tgen[2].port = hh.seriallink[2].slave + system.tgen[3].port = hh.seriallink[3].slave + # connect the system port even if it is not used in this example + system.system_port = system.membus.slave + if options.arch == "same": + hh = system.hmc_host + for i in xrange(options.num_links_controllers): + if options.enable_global_monitor: + system.tgen[i].port = hh.lmonitor[i].slave + else: + system.tgen[i].port = hh.seriallink[i].slave + # set up the root SimObject + root = Root(full_system=False, system=system) + return root + + +def main(): + parser = argparse.ArgumentParser(description="Simple system using HMC as\ + main memory") + HMC.add_options(parser) + add_options(parser) + options = parser.parse_args() + # build the system + root = build_system(options) + # instantiate all of the objects we've created so far + m5.instantiate() + print "Beginning simulation!" + event = m5.simulate(10000000000) + m5.stats.dump() + print 'Exiting @ tick %i because %s (exit code is %i)' % (m5.curTick(), + event.getCause(), + event.getCode()) + print "Done" + + +if __name__ == "__m5_main__": + main() |