From 173a7869219534de5053889a84e1006281ec7645 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Mon, 20 Jul 2015 09:15:18 -0500 Subject: ruby: more flexible ruby tester support This patch allows the ruby random tester to use ruby ports that may only support instr or data requests. This patch is similar to a previous changeset (8932:1b2c17565ac8) that was unfortunately broken by subsequent changesets. This current patch implements the support in a more straight-forward way. Since retries are now tested when running the ruby random tester, this patch splits up the retry and drain check behavior so that RubyPort children, such as the GPUCoalescer, can perform those operations correctly without having to duplicate code. Finally, the patch also includes better DPRINTFs for debugging the tester. --- configs/example/ruby_random_test.py | 13 ++++-- configs/ruby/MESI_Three_Level.py | 82 ++++++++++++++++++++++--------------- configs/ruby/MESI_Two_Level.py | 48 +++++++++++++--------- configs/ruby/MI_example.py | 37 ++++++++++------- configs/ruby/MOESI_CMP_directory.py | 37 ++++++++++------- configs/ruby/MOESI_CMP_token.py | 54 +++++++++++++----------- configs/ruby/MOESI_hammer.py | 40 ++++++++++-------- 7 files changed, 185 insertions(+), 126 deletions(-) (limited to 'configs') diff --git a/configs/example/ruby_random_test.py b/configs/example/ruby_random_test.py index 225b3d23b..10d4318c7 100644 --- a/configs/example/ruby_random_test.py +++ b/configs/example/ruby_random_test.py @@ -125,10 +125,15 @@ for ruby_port in system.ruby._cpu_ports: # # Tie the ruby tester ports to the ruby cpu read and write ports # - if ruby_port.support_data_reqs: - tester.cpuDataPort = ruby_port.slave - if ruby_port.support_inst_reqs: - tester.cpuInstPort = ruby_port.slave + if ruby_port.support_data_reqs and ruby_port.support_inst_reqs: + tester.cpuInstDataPort = ruby_port.slave + elif ruby_port.support_data_reqs: + tester.cpuDataPort = ruby_port.slave + elif ruby_port.support_inst_reqs: + tester.cpuInstPort = ruby_port.slave + + # Do not automatically retry stalled Ruby requests + ruby_port.no_retry_on_stall = True # # Tell each sequencer this is the ruby tester so that it diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py index 9db9d27cd..1d4b6ebf6 100644 --- a/configs/ruby/MESI_Three_Level.py +++ b/configs/ruby/MESI_Three_Level.py @@ -1,5 +1,5 @@ # Copyright (c) 2006-2007 The Regents of The University of Michigan -# Copyright (c) 2009 Advanced Micro Devices, Inc. +# Copyright (c) 2009,2015 Advanced Micro Devices, Inc. # Copyright (c) 2013 Mark D. Hill and David A. Wood # All rights reserved. # @@ -44,22 +44,24 @@ class L1Cache(RubyCache): pass class L2Cache(RubyCache): pass def define_options(parser): - parser.add_option("--num-clusters", type="int", default=1, - help="number of clusters in a design in which there are shared\ + parser.add_option("--num-clusters", type = "int", default = 1, + help = "number of clusters in a design in which there are shared\ caches private to clusters") return def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Three_Level': - fatal("This script requires the MESI_Three_Level protocol to be built.") + fatal("This script requires the MESI_Three_Level protocol to be\ + built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be - # consistent with the NetDest list. Therefore the l1 controller nodes must be - # listed before the directory nodes and directory nodes before dma nodes, etc. + # consistent with the NetDest list. Therefore the l1 controller nodes + # must be listed before the directory nodes and directory nodes before + # dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] @@ -94,30 +96,45 @@ def create_system(options, full_system, system, dma_ports, ruby_system): start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) - l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j, - Icache = l0i_cache, Dcache = l0d_cache, - send_evictions = send_evicts(options), - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l0_cntrl = L0Cache_Controller( + version = i * num_cpus_per_cluster + j, Icache = l0i_cache, + Dcache = l0d_cache, send_evictions = send_evicts(options), + clk_domain = clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j, - icache = l0i_cache, - clk_domain=system.cpu[i].clk_domain, - dcache = l0d_cache, ruby_system = ruby_system) + icache = l0i_cache, + clk_domain = clk_domain, + dcache = l0d_cache, + ruby_system = ruby_system) l0_cntrl.sequencer = cpu_seq - l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, - start_index_bit = block_size_bits, is_icache = False) + l1_cache = L1Cache(size = options.l1d_size, + assoc = options.l1d_assoc, + start_index_bit = block_size_bits, + is_icache = False) - l1_cntrl = L1Cache_Controller(version = i*num_cpus_per_cluster+j, - cache = l1_cache, l2_select_num_bits = l2_bits, - cluster_id = i, ruby_system = ruby_system) + l1_cntrl = L1Cache_Controller( + version = i * num_cpus_per_cluster + j, + cache = l1_cache, l2_select_num_bits = l2_bits, + cluster_id = i, ruby_system = ruby_system) - exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( - i*num_cpus_per_cluster+j)) - exec("ruby_system.l1_cntrl%d = l1_cntrl" % ( - i*num_cpus_per_cluster+j)) + exec("ruby_system.l0_cntrl%d = l0_cntrl" + % ( i * num_cpus_per_cluster + j)) + exec("ruby_system.l1_cntrl%d = l1_cntrl" + % ( i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists @@ -155,11 +172,11 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l2_cntrl = L2Cache_Controller( version = i * num_l2caches_per_cluster + j, L2cache = l2_cache, cluster_id = i, - transitions_per_cycle=options.ports, + transitions_per_cycle = options.ports, ruby_system = ruby_system) - exec("ruby_system.l2_cntrl%d = l2_cntrl" % ( - i * num_l2caches_per_cluster + j)) + exec("ruby_system.l2_cntrl%d = l2_cntrl" + % (i * num_l2caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network @@ -185,8 +202,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( - clk_domain=ruby_system.clk_domain, - clk_divider=3) + clk_domain = ruby_system.clk_domain, clk_divider = 3) for i in xrange(options.num_dirs): # @@ -196,10 +212,9 @@ def create_system(options, full_system, system, dma_ports, ruby_system): dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + directory = RubyDirectoryMemory(version = i, size = dir_size), + transitions_per_cycle = options.ports, + ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) @@ -217,8 +232,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # # Create the Ruby objects associated with the dma controller # - dma_seq = DMASequencer(version = i, - ruby_system = ruby_system) + dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py index 9997a117b..4cfa54bd8 100644 --- a/configs/ruby/MESI_Two_Level.py +++ b/configs/ruby/MESI_Two_Level.py @@ -82,23 +82,33 @@ def create_system(options, full_system, system, dma_ports, ruby_system): prefetcher = RubyPrefetcher.Prefetcher() - l1_cntrl = L1Cache_Controller(version = i, - L1Icache = l1i_cache, + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, l2_select_num_bits = l2_bits, send_evictions = send_evicts(options), prefetcher = prefetcher, ruby_system = ruby_system, - clk_domain=system.cpu[i].clk_domain, - transitions_per_cycle=options.ports, + clk_domain = clk_domain, + transitions_per_cycle = options.ports, enable_prefetch = False) - cpu_seq = RubySequencer(version = i, - icache = l1i_cache, - dcache = l1d_cache, - clk_domain=system.cpu[i].clk_domain, + cpu_seq = RubySequencer(version = i, icache = l1i_cache, + dcache = l1d_cache, clk_domain = clk_domain, ruby_system = ruby_system) + l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) @@ -135,7 +145,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, - transitions_per_cycle=options.ports, + transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) @@ -166,18 +176,17 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( - clk_domain=ruby_system.clk_domain, - clk_divider=3) + clk_domain = ruby_system.clk_domain, + clk_divider = 3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + directory = RubyDirectoryMemory(version = i, size = dir_size), + transitions_per_cycle = options.ports, + ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) @@ -194,12 +203,10 @@ def create_system(options, full_system, system, dma_ports, ruby_system): for i, dma_port in enumerate(dma_ports): # Create the Ruby objects associated with the dma controller - dma_seq = DMASequencer(version = i, - ruby_system = ruby_system, + dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) - dma_cntrl = DMA_Controller(version = i, - dma_sequencer = dma_seq, + dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) @@ -220,7 +227,8 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # Create the io controller and the sequencer if full_system: - io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) + io_seq = DMASequencer(version = len(dma_ports), + ruby_system = ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py index 6f28c6ade..24b0f9716 100644 --- a/configs/ruby/MI_example.py +++ b/configs/ruby/MI_example.py @@ -74,21 +74,28 @@ def create_system(options, full_system, system, dma_ports, ruby_system): assoc = options.l1d_assoc, start_index_bit = block_size_bits) - # - # Only one unified L1 cache exists. Can cache instructions and data. - # - l1_cntrl = L1Cache_Controller(version = i, - cacheMemory = cache, - send_evictions = send_evicts(options), - transitions_per_cycle = options.ports, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) - - cpu_seq = RubySequencer(version = i, - icache = cache, - dcache = cache, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + # Only one unified L1 cache exists. Can cache instructions and data. + l1_cntrl = L1Cache_Controller(version=i, cacheMemory=cache, + send_evictions=send_evicts(options), + transitions_per_cycle=options.ports, + clk_domain=clk_domain, + ruby_system=ruby_system) + + cpu_seq = RubySequencer(version=i, icache=cache, dcache=cache, + clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py index cdf8688f0..a72b5b20e 100644 --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -80,20 +80,29 @@ def create_system(options, full_system, system, dma_ports, ruby_system): start_index_bit = block_size_bits, is_icache = False) - l1_cntrl = L1Cache_Controller(version = i, - L1Icache = l1i_cache, - L1Dcache = l1d_cache, - l2_select_num_bits = l2_bits, - send_evictions = send_evicts(options), - transitions_per_cycle = options.ports, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) - - cpu_seq = RubySequencer(version = i, - icache = l1i_cache, - dcache = l1d_cache, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, + L1Dcache=l1d_cache, + l2_select_num_bits=l2_bits, + send_evictions=send_evicts(options), + transitions_per_cycle=options.ports, + clk_domain=clk_domain, + ruby_system=ruby_system) + + cpu_seq = RubySequencer(version=i, icache=l1i_cache, + dcache=l1d_cache, clk_domain=clk_domain, + ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py index 23c6d9fef..7161544b7 100644 --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -91,29 +91,37 @@ def create_system(options, full_system, system, dma_ports, ruby_system): assoc = options.l1d_assoc, start_index_bit = block_size_bits) - l1_cntrl = L1Cache_Controller(version = i, - L1Icache = l1i_cache, - L1Dcache = l1d_cache, - l2_select_num_bits = l2_bits, - N_tokens = n_tokens, - retry_threshold = \ - options.l1_retries, - fixed_timeout_latency = \ - options.timeout_latency, - dynamic_timeout_enabled = \ - not options.disable_dyn_timeouts, - no_mig_atomic = not \ - options.allow_atomic_migration, - send_evictions = send_evicts(options), - transitions_per_cycle = options.ports, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) - - cpu_seq = RubySequencer(version = i, - icache = l1i_cache, - dcache = l1d_cache, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, + L1Dcache=l1d_cache, + l2_select_num_bits=l2_bits, + N_tokens=n_tokens, + retry_threshold=options.l1_retries, + fixed_timeout_latency=\ + options.timeout_latency, + dynamic_timeout_enabled=\ + not options.disable_dyn_timeouts, + no_mig_atomic=not \ + options.allow_atomic_migration, + send_evictions=send_evicts(options), + transitions_per_cycle=options.ports, + clk_domain=clk_domain, + ruby_system=ruby_system) + + cpu_seq = RubySequencer(version=i, icache=l1i_cache, + dcache=l1d_cache, clk_domain=clk_domain, + ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py index 0860fb682..6a1cfd70b 100644 --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -89,22 +89,30 @@ def create_system(options, full_system, system, dma_ports, ruby_system): assoc = options.l2_assoc, start_index_bit = block_size_bits) - l1_cntrl = L1Cache_Controller(version = i, - L1Icache = l1i_cache, - L1Dcache = l1d_cache, - L2cache = l2_cache, - no_mig_atomic = not \ - options.allow_atomic_migration, - send_evictions = send_evicts(options), - transitions_per_cycle = options.ports, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) - - cpu_seq = RubySequencer(version = i, - icache = l1i_cache, - dcache = l1d_cache, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, + L1Dcache=l1d_cache, L2cache=l2_cache, + no_mig_atomic=not \ + options.allow_atomic_migration, + send_evictions=send_evicts(options), + transitions_per_cycle=options.ports, + clk_domain=clk_domain, + ruby_system=ruby_system) + + cpu_seq = RubySequencer(version=i, icache=l1i_cache, + dcache=l1d_cache,clk_domain=clk_domain, + ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: -- cgit v1.2.3