ruby: more flexible ruby tester support

This patch allows the ruby random tester to use ruby ports that may only support instr or data requests. This patch is similar to a previous changeset (8932:1b2c17565ac8) that was unfortunately broken by subsequent changesets. This current patch implements the support in a more straight-forward way. Since retries are now tested when running the ruby random tester, this patch splits up the retry and drain check behavior so that RubyPort children, such as the GPUCoalescer, can perform those operations correctly without having to duplicate code. Finally, the patch also includes better DPRINTFs for debugging the tester.
author: Brad Beckmann <Brad.Beckmann@amd.com> 2015-07-20 09:15:18 -0500
committer: Brad Beckmann <Brad.Beckmann@amd.com> 2015-07-20 09:15:18 -0500
commit: 173a7869219534de5053889a84e1006281ec7645 (patch)
tree: 0cbd44d0669204d4fb3a551e98b1b668f8e6bee4
parent: 4e6241007c514c3f90e9aeebf7cfd92853e45850 (diff)
download: gem5-173a7869219534de5053889a84e1006281ec7645.tar.xz
16 files changed, 326 insertions, 183 deletions
diff --git a/configs/example/ruby_random_test.py b/configs/example/ruby_random_test.py
index 225b3d23b..10d4318c7 100644
--- a/configs/example/ruby_random_test.py
+++ b/configs/example/ruby_random_test.py
@@ -125,10 +125,15 @@ for ruby_port in system.ruby._cpu_ports:
     #
     # Tie the ruby tester ports to the ruby cpu read and write ports
     #
-    if ruby_port.support_data_reqs:
-         tester.cpuDataPort = ruby_port.slave
-    if ruby_port.support_inst_reqs:
-         tester.cpuInstPort = ruby_port.slave
+    if ruby_port.support_data_reqs and ruby_port.support_inst_reqs:
+        tester.cpuInstDataPort = ruby_port.slave
+    elif ruby_port.support_data_reqs:
+        tester.cpuDataPort = ruby_port.slave
+    elif ruby_port.support_inst_reqs:
+        tester.cpuInstPort = ruby_port.slave
+
+    # Do not automatically retry stalled Ruby requests
+    ruby_port.no_retry_on_stall = True
 
     #
     # Tell each sequencer this is the ruby tester so that it
diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py
index 9db9d27cd..1d4b6ebf6 100644
--- a/configs/ruby/MESI_Three_Level.py
+++ b/configs/ruby/MESI_Three_Level.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2006-2007 The Regents of The University of Michigan
-# Copyright (c) 2009 Advanced Micro Devices, Inc.
+# Copyright (c) 2009,2015 Advanced Micro Devices, Inc.
 # Copyright (c) 2013 Mark D. Hill and David A. Wood
 # All rights reserved.
 #
@@ -44,22 +44,24 @@ class L1Cache(RubyCache): pass
 class L2Cache(RubyCache): pass
 
 def define_options(parser):
-    parser.add_option("--num-clusters", type="int", default=1,
-            help="number of clusters in a design in which there are shared\
+    parser.add_option("--num-clusters", type = "int", default = 1,
+            help = "number of clusters in a design in which there are shared\
             caches private to clusters")
     return
 
 def create_system(options, full_system, system, dma_ports, ruby_system):
 
     if buildEnv['PROTOCOL'] != 'MESI_Three_Level':
-        fatal("This script requires the MESI_Three_Level protocol to be built.")
+        fatal("This script requires the MESI_Three_Level protocol to be\
+               built.")
 
     cpu_sequencers = []
 
     #
     # The ruby network creation expects the list of nodes in the system to be
-    # consistent with the NetDest list.  Therefore the l1 controller nodes must be
-    # listed before the directory nodes and directory nodes before dma nodes, etc.
+    # consistent with the NetDest list.  Therefore the l1 controller nodes
+    # must be listed before the directory nodes and directory nodes before
+    # dma nodes, etc.
     #
     l0_cntrl_nodes = []
     l1_cntrl_nodes = []
@@ -94,30 +96,45 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                 start_index_bit = block_size_bits,
                 replacement_policy = LRUReplacementPolicy())
 
-            l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
-                          Icache = l0i_cache, Dcache = l0d_cache,
-                          send_evictions = send_evicts(options),
-                          clk_domain=system.cpu[i].clk_domain,
-                          ruby_system = ruby_system)
+            # the ruby random tester reuses num_cpus to specify the
+            # number of cpu ports connected to the tester object, which
+            # is stored in system.cpu. because there is only ever one
+            # tester object, num_cpus is not necessarily equal to the
+            # size of system.cpu; therefore if len(system.cpu) == 1
+            # we use system.cpu[0] to set the clk_domain, thereby ensuring
+            # we don't index off the end of the cpu list.
+            if len(system.cpu) == 1:
+                clk_domain = system.cpu[0].clk_domain
+            else:
+                clk_domain = system.cpu[i].clk_domain
+
+            l0_cntrl = L0Cache_Controller(
+                   version = i * num_cpus_per_cluster + j, Icache = l0i_cache,
+                   Dcache = l0d_cache, send_evictions = send_evicts(options),
+                   clk_domain = clk_domain, ruby_system = ruby_system)
 
             cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j,
-                        icache = l0i_cache,
-                        clk_domain=system.cpu[i].clk_domain,
-                        dcache = l0d_cache, ruby_system = ruby_system)
+                                    icache = l0i_cache,
+                                    clk_domain = clk_domain,
+                                    dcache = l0d_cache,
+                                    ruby_system = ruby_system)
 
             l0_cntrl.sequencer = cpu_seq
 
-            l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc,
-                            start_index_bit = block_size_bits, is_icache = False)
+            l1_cache = L1Cache(size = options.l1d_size,
+                               assoc = options.l1d_assoc,
+                               start_index_bit = block_size_bits,
+                               is_icache = False)
 
-            l1_cntrl = L1Cache_Controller(version = i*num_cpus_per_cluster+j,
-                          cache = l1_cache, l2_select_num_bits = l2_bits,
-                          cluster_id = i, ruby_system = ruby_system)
+            l1_cntrl = L1Cache_Controller(
+                    version = i * num_cpus_per_cluster + j,
+                    cache = l1_cache, l2_select_num_bits = l2_bits,
+                    cluster_id = i, ruby_system = ruby_system)
 
-            exec("ruby_system.l0_cntrl%d = l0_cntrl" % (
-                        i*num_cpus_per_cluster+j))
-            exec("ruby_system.l1_cntrl%d = l1_cntrl" % (
-                        i*num_cpus_per_cluster+j))
+            exec("ruby_system.l0_cntrl%d = l0_cntrl"
+                 % ( i * num_cpus_per_cluster + j))
+            exec("ruby_system.l1_cntrl%d = l1_cntrl"
+                 % ( i * num_cpus_per_cluster + j))
 
             #
             # Add controllers and sequencers to the appropriate lists
@@ -155,11 +172,11 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
             l2_cntrl = L2Cache_Controller(
                         version = i * num_l2caches_per_cluster + j,
                         L2cache = l2_cache, cluster_id = i,
-                        transitions_per_cycle=options.ports,
+                        transitions_per_cycle = options.ports,
                         ruby_system = ruby_system)
 
-            exec("ruby_system.l2_cntrl%d = l2_cntrl" % (
-                        i * num_l2caches_per_cluster + j))
+            exec("ruby_system.l2_cntrl%d = l2_cntrl"
+                 % (i * num_l2caches_per_cluster + j))
             l2_cntrl_nodes.append(l2_cntrl)
 
             # Connect the L2 controllers and the network
@@ -185,8 +202,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
     # the ruby system
     # clk_divider value is a fix to pass regression.
     ruby_system.memctrl_clk_domain = DerivedClockDomain(
-                                          clk_domain=ruby_system.clk_domain,
-                                          clk_divider=3)
+            clk_domain = ruby_system.clk_domain, clk_divider = 3)
 
     for i in xrange(options.num_dirs):
         #
@@ -196,10 +212,9 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
         dir_size.value = mem_module_size
 
         dir_cntrl = Directory_Controller(version = i,
-                                         directory = RubyDirectoryMemory(
-                                             version = i, size = dir_size),
-                                         transitions_per_cycle = options.ports,
-                                         ruby_system = ruby_system)
+                directory = RubyDirectoryMemory(version = i, size = dir_size),
+                transitions_per_cycle = options.ports,
+                ruby_system = ruby_system)
 
         exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
         dir_cntrl_nodes.append(dir_cntrl)
@@ -217,8 +232,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
         #
         # Create the Ruby objects associated with the dma controller
         #
-        dma_seq = DMASequencer(version = i,
-                               ruby_system = ruby_system)
+        dma_seq = DMASequencer(version = i, ruby_system = ruby_system)
 
         dma_cntrl = DMA_Controller(version = i,
                                    dma_sequencer = dma_seq,
diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py
index 9997a117b..4cfa54bd8 100644
--- a/configs/ruby/MESI_Two_Level.py
+++ b/configs/ruby/MESI_Two_Level.py
@@ -82,23 +82,33 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
 
         prefetcher = RubyPrefetcher.Prefetcher()
 
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      L1Icache = l1i_cache,
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache,
                                       L1Dcache = l1d_cache,
                                       l2_select_num_bits = l2_bits,
                                       send_evictions = send_evicts(options),
                                       prefetcher = prefetcher,
                                       ruby_system = ruby_system,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      transitions_per_cycle=options.ports,
+                                      clk_domain = clk_domain,
+                                      transitions_per_cycle = options.ports,
                                       enable_prefetch = False)
 
-        cpu_seq = RubySequencer(version = i,
-                                icache = l1i_cache,
-                                dcache = l1d_cache,
-                                clk_domain=system.cpu[i].clk_domain,
+        cpu_seq = RubySequencer(version = i, icache = l1i_cache,
+                                dcache = l1d_cache, clk_domain = clk_domain,
                                 ruby_system = ruby_system)
 
+
         l1_cntrl.sequencer = cpu_seq
         exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
 
@@ -135,7 +145,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
 
         l2_cntrl = L2Cache_Controller(version = i,
                                       L2cache = l2_cache,
-                                      transitions_per_cycle=options.ports,
+                                      transitions_per_cycle = options.ports,
                                       ruby_system = ruby_system)
 
         exec("ruby_system.l2_cntrl%d = l2_cntrl" % i)
@@ -166,18 +176,17 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
     # the ruby system
     # clk_divider value is a fix to pass regression.
     ruby_system.memctrl_clk_domain = DerivedClockDomain(
-                                          clk_domain=ruby_system.clk_domain,
-                                          clk_divider=3)
+                                          clk_domain = ruby_system.clk_domain,
+                                          clk_divider = 3)
 
     for i in xrange(options.num_dirs):
         dir_size = MemorySize('0B')
         dir_size.value = mem_module_size
 
         dir_cntrl = Directory_Controller(version = i,
-                                         directory = RubyDirectoryMemory(
-                                             version = i, size = dir_size),
-                                         transitions_per_cycle = options.ports,
-                                         ruby_system = ruby_system)
+                directory = RubyDirectoryMemory(version = i, size = dir_size),
+                transitions_per_cycle = options.ports,
+                ruby_system = ruby_system)
 
         exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
         dir_cntrl_nodes.append(dir_cntrl)
@@ -194,12 +203,10 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
 
     for i, dma_port in enumerate(dma_ports):
         # Create the Ruby objects associated with the dma controller
-        dma_seq = DMASequencer(version = i,
-                               ruby_system = ruby_system,
+        dma_seq = DMASequencer(version = i, ruby_system = ruby_system,
                                slave = dma_port)
 
-        dma_cntrl = DMA_Controller(version = i,
-                                   dma_sequencer = dma_seq,
+        dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq,
                                    transitions_per_cycle = options.ports,
                                    ruby_system = ruby_system)
 
@@ -220,7 +227,8 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
 
     # Create the io controller and the sequencer
     if full_system:
-        io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
+        io_seq = DMASequencer(version = len(dma_ports),
+                              ruby_system = ruby_system)
         ruby_system._io_port = io_seq
         io_controller = DMA_Controller(version = len(dma_ports),
                                        dma_sequencer = io_seq,
diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py
index 6f28c6ade..24b0f9716 100644
--- a/configs/ruby/MI_example.py
+++ b/configs/ruby/MI_example.py
@@ -74,21 +74,28 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                         assoc = options.l1d_assoc,
                         start_index_bit = block_size_bits)
 
-        #
-        # Only one unified L1 cache exists.  Can cache instructions and data.
-        #
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      cacheMemory = cache,
-                                      send_evictions = send_evicts(options),
-                                      transitions_per_cycle = options.ports,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      ruby_system = ruby_system)
-
-        cpu_seq = RubySequencer(version = i,
-                                icache = cache,
-                                dcache = cache,
-                                clk_domain=system.cpu[i].clk_domain,
-                                ruby_system = ruby_system)
+
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        # Only one unified L1 cache exists. Can cache instructions and data.
+        l1_cntrl = L1Cache_Controller(version=i, cacheMemory=cache,
+                                      send_evictions=send_evicts(options),
+                                      transitions_per_cycle=options.ports,
+                                      clk_domain=clk_domain,
+                                      ruby_system=ruby_system)
+
+        cpu_seq = RubySequencer(version=i, icache=cache, dcache=cache,
+                                clk_domain=clk_domain, ruby_system=ruby_system)
 
         l1_cntrl.sequencer = cpu_seq
         exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py
index cdf8688f0..a72b5b20e 100644
--- a/configs/ruby/MOESI_CMP_directory.py
+++ b/configs/ruby/MOESI_CMP_directory.py
@@ -80,20 +80,29 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                             start_index_bit = block_size_bits,
                             is_icache = False)
 
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      L1Icache = l1i_cache,
-                                      L1Dcache = l1d_cache,
-                                      l2_select_num_bits = l2_bits,
-                                      send_evictions = send_evicts(options),
-                                      transitions_per_cycle = options.ports,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      ruby_system = ruby_system)
-
-        cpu_seq = RubySequencer(version = i,
-                                icache = l1i_cache,
-                                dcache = l1d_cache,
-                                clk_domain=system.cpu[i].clk_domain,
-                                ruby_system = ruby_system)
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
+                                      L1Dcache=l1d_cache,
+                                      l2_select_num_bits=l2_bits,
+                                      send_evictions=send_evicts(options),
+                                      transitions_per_cycle=options.ports,
+                                      clk_domain=clk_domain,
+                                      ruby_system=ruby_system)
+
+        cpu_seq = RubySequencer(version=i, icache=l1i_cache,
+                                dcache=l1d_cache, clk_domain=clk_domain,
+                                ruby_system=ruby_system)
 
         l1_cntrl.sequencer = cpu_seq
         exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py
index 23c6d9fef..7161544b7 100644
--- a/configs/ruby/MOESI_CMP_token.py
+++ b/configs/ruby/MOESI_CMP_token.py
@@ -91,29 +91,37 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                             assoc = options.l1d_assoc,
                             start_index_bit = block_size_bits)
 
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      L1Icache = l1i_cache,
-                                      L1Dcache = l1d_cache,
-                                      l2_select_num_bits = l2_bits,
-                                      N_tokens = n_tokens,
-                                      retry_threshold = \
-                                        options.l1_retries,
-                                      fixed_timeout_latency = \
-                                        options.timeout_latency,
-                                      dynamic_timeout_enabled = \
-                                        not options.disable_dyn_timeouts,
-                                      no_mig_atomic = not \
-                                        options.allow_atomic_migration,
-                                      send_evictions = send_evicts(options),
-                                      transitions_per_cycle = options.ports,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      ruby_system = ruby_system)
-
-        cpu_seq = RubySequencer(version = i,
-                                icache = l1i_cache,
-                                dcache = l1d_cache,
-                                clk_domain=system.cpu[i].clk_domain,
-                                ruby_system = ruby_system)
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
+                                      L1Dcache=l1d_cache,
+                                      l2_select_num_bits=l2_bits,
+                                      N_tokens=n_tokens,
+                                      retry_threshold=options.l1_retries,
+                                      fixed_timeout_latency=\
+                                      options.timeout_latency,
+                                      dynamic_timeout_enabled=\
+                                      not options.disable_dyn_timeouts,
+                                      no_mig_atomic=not \
+                                      options.allow_atomic_migration,
+                                      send_evictions=send_evicts(options),
+                                      transitions_per_cycle=options.ports,
+                                      clk_domain=clk_domain,
+                                      ruby_system=ruby_system)
+
+        cpu_seq = RubySequencer(version=i, icache=l1i_cache,
+                                dcache=l1d_cache, clk_domain=clk_domain,
+                                ruby_system=ruby_system)
 
         l1_cntrl.sequencer = cpu_seq
         exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py
index 0860fb682..6a1cfd70b 100644
--- a/configs/ruby/MOESI_hammer.py
+++ b/configs/ruby/MOESI_hammer.py
@@ -89,22 +89,30 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                            assoc = options.l2_assoc,
                            start_index_bit = block_size_bits)
 
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      L1Icache = l1i_cache,
-                                      L1Dcache = l1d_cache,
-                                      L2cache = l2_cache,
-                                      no_mig_atomic = not \
-                                        options.allow_atomic_migration,
-                                      send_evictions = send_evicts(options),
-                                      transitions_per_cycle = options.ports,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      ruby_system = ruby_system)
-
-        cpu_seq = RubySequencer(version = i,
-                                icache = l1i_cache,
-                                dcache = l1d_cache,
-                                clk_domain=system.cpu[i].clk_domain,
-                                ruby_system = ruby_system)
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
+                                      L1Dcache=l1d_cache, L2cache=l2_cache,
+                                      no_mig_atomic=not \
+                                      options.allow_atomic_migration,
+                                      send_evictions=send_evicts(options),
+                                      transitions_per_cycle=options.ports,
+                                      clk_domain=clk_domain,
+                                      ruby_system=ruby_system)
+
+        cpu_seq = RubySequencer(version=i, icache=l1i_cache,
+                                dcache=l1d_cache,clk_domain=clk_domain,
+                                ruby_system=ruby_system)
 
         l1_cntrl.sequencer = cpu_seq
         if options.recycle_latency:
diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc
index 4cdaf9b2f..c8e7816c3 100644
--- a/src/cpu/testers/rubytest/Check.cc
+++ b/src/cpu/testers/rubytest/Check.cc
@@ -94,7 +94,9 @@ Check::initiatePrefetch()
         cmd = MemCmd::ReadReq;
 
         // if necessary, make the request an instruction fetch
-        if (m_tester_ptr->isInstReadableCpuPort(index)) {
+        if (m_tester_ptr->isInstOnlyCpuPort(index) ||
+            (m_tester_ptr->isInstDataCpuPort(index) &&
+             (random_mt.random(0, 0x1)))) {
             flags.set(Request::INST_FETCH);
         }
     } else {
@@ -193,7 +195,7 @@ Check::initiateAction()
     *writeData = m_value + m_store_count;
     pkt->dataDynamic(writeData);
 
-    DPRINTF(RubyTest, "data 0x%x check 0x%x\n",
+    DPRINTF(RubyTest, "Seq write: index %d data 0x%x check 0x%x\n", index,
             *(pkt->getConstPtr<uint8_t>()), *writeData);
 
     // push the subblock onto the sender state.  The sequencer will
@@ -205,6 +207,7 @@ Check::initiateAction()
         DPRINTF(RubyTest, "status before action update: %s\n",
                 (TesterStatus_to_string(m_status)).c_str());
         m_status = TesterStatus_Action_Pending;
+        DPRINTF(RubyTest, "Check %s, State=Action_Pending\n", m_address);
     } else {
         // If the packet did not issue, must delete
         // Note: No need to delete the data, the packet destructor
@@ -232,7 +235,9 @@ Check::initiateCheck()
     Request::Flags flags;
 
     // If necessary, make the request an instruction fetch
-    if (m_tester_ptr->isInstReadableCpuPort(index)) {
+    if (m_tester_ptr->isInstOnlyCpuPort(index) ||
+        (m_tester_ptr->isInstDataCpuPort(index) &&
+         (random_mt.random(0, 0x1)))) {
         flags.set(Request::INST_FETCH);
     }
 
@@ -245,6 +250,8 @@ Check::initiateCheck()
     uint8_t *dataArray = new uint8_t[CHECK_SIZE];
     pkt->dataDynamic(dataArray);
 
+    DPRINTF(RubyTest, "Seq read: index %d\n", index);
+
     // push the subblock onto the sender state.  The sequencer will
     // update the subblock on the return
     pkt->senderState = new SenderState(m_address, req->getSize());
@@ -254,6 +261,7 @@ Check::initiateCheck()
         DPRINTF(RubyTest, "status before check update: %s\n",
                 TesterStatus_to_string(m_status).c_str());
         m_status = TesterStatus_Check_Pending;
+        DPRINTF(RubyTest, "Check %s, State=Check_Pending\n", m_address);
     } else {
         // If the packet did not issue, must delete
         // Note: No need to delete the data, the packet destructor
@@ -291,8 +299,11 @@ Check::performCallback(NodeID proc, SubBlock* data, Cycles curTime)
         m_store_count++;
         if (m_store_count == CHECK_SIZE) {
             m_status = TesterStatus_Ready;
+            DPRINTF(RubyTest, "Check %s, State=Ready\n", m_address);
         } else {
             m_status = TesterStatus_Idle;
+            DPRINTF(RubyTest, "Check %s, State=Idle store_count: %d\n",
+                    m_address, m_store_count);
         }
         DPRINTF(RubyTest, "Action callback return data now %d\n",
                 data->getByte(0));
@@ -316,6 +327,7 @@ Check::performCallback(NodeID proc, SubBlock* data, Cycles curTime)
         m_tester_ptr->incrementCheckCompletions();
 
         m_status = TesterStatus_Idle;
+        DPRINTF(RubyTest, "Check %s, State=Idle\n", m_address);
         pickValue();
 
     } else {
@@ -335,6 +347,7 @@ Check::changeAddress(Addr address)
     assert(m_status == TesterStatus_Idle || m_status == TesterStatus_Ready);
     m_status = TesterStatus_Idle;
     m_address = address;
+    DPRINTF(RubyTest, "Check %s, State=Idle\n", m_address);
     m_store_count = 0;
 }
 
@@ -342,7 +355,6 @@ void
 Check::pickValue()
 {
     assert(m_status == TesterStatus_Idle);
-    m_status = TesterStatus_Idle;
     m_value = random_mt.random(0, 0xff); // One byte
     m_store_count = 0;
 }
@@ -353,7 +365,8 @@ Check::pickInitiatingNode()
     assert(m_status == TesterStatus_Idle || m_status == TesterStatus_Ready);
     m_status = TesterStatus_Idle;
     m_initiatingNode = (random_mt.random(0, m_num_writers - 1));
-    DPRINTF(RubyTest, "picked initiating node %d\n", m_initiatingNode);
+    DPRINTF(RubyTest, "Check %s, State=Idle, picked initiating node %d\n",
+            m_address, m_initiatingNode);
     m_store_count = 0;
 }
 
diff --git a/src/cpu/testers/rubytest/CheckTable.cc b/src/cpu/testers/rubytest/CheckTable.cc
index b75fd0a52..3bdd73f27 100644
--- a/src/cpu/testers/rubytest/CheckTable.cc
+++ b/src/cpu/testers/rubytest/CheckTable.cc
@@ -42,6 +42,7 @@ CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester)
     const int size1 = 32;
     const int size2 = 100;
 
+    DPRINTF(RubyTest, "Adding false sharing checks\n");
     // The first set is to get some false sharing
     physical = 1000;
     for (int i = 0; i < size1; i++) {
@@ -50,6 +51,7 @@ CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester)
         physical += CHECK_SIZE;
     }
 
+    DPRINTF(RubyTest, "Adding cache conflict checks\n");
     // The next two sets are to get some limited false sharing and
     // cache conflicts
     physical = 1000;
@@ -59,6 +61,7 @@ CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester)
         physical += 256;
     }
 
+    DPRINTF(RubyTest, "Adding cache conflict checks2\n");
     physical = 1000 + CHECK_SIZE;
     for (int i = 0; i < size2; i++) {
         // Setup linear addresses
@@ -91,6 +94,8 @@ CheckTable::addCheck(Addr address)
         }
     }
 
+    DPRINTF(RubyTest, "Adding check for address: %s\n", address);
+
     Check* check_ptr = new Check(address, 100 + m_check_vector.size(),
                                  m_num_writers, m_num_readers, m_tester_ptr);
     for (int i = 0; i < CHECK_SIZE; i++) {
@@ -110,7 +115,7 @@ CheckTable::getRandomCheck()
 Check*
 CheckTable::getCheck(const Addr address)
 {
-    DPRINTF(RubyTest, "Looking for check by address: %s", address);
+    DPRINTF(RubyTest, "Looking for check by address: %s\n", address);
 
     auto i = m_lookup_map.find(address);
 
diff --git a/src/cpu/testers/rubytest/RubyTester.cc b/src/cpu/testers/rubytest/RubyTester.cc
index e0f30f552..5ed6d7f66 100644
--- a/src/cpu/testers/rubytest/RubyTester.cc
+++ b/src/cpu/testers/rubytest/RubyTester.cc
@@ -58,7 +58,8 @@ RubyTester::RubyTester(const Params *p)
     m_num_readers(0),
     m_wakeup_frequency(p->wakeup_frequency),
     m_check_flush(p->check_flush),
-    m_num_inst_ports(p->port_cpuInstPort_connection_count)
+    m_num_inst_only_ports(p->port_cpuInstPort_connection_count),
+    m_num_inst_data_ports(p->port_cpuInstDataPort_connection_count)
 {
     m_checks_completed = 0;
 
@@ -73,15 +74,25 @@ RubyTester::RubyTester(const Params *p)
     // Note: the inst ports are the lowest elements of the readPort vector,
     // then the data ports are added to the readPort vector
     //
+    int idx = 0;
     for (int i = 0; i < p->port_cpuInstPort_connection_count; ++i) {
         readPorts.push_back(new CpuPort(csprintf("%s-instPort%d", name(), i),
-                                        this, i));
+                                        this, i, idx));
+        idx++;
+    }
+    for (int i = 0; i < p->port_cpuInstDataPort_connection_count; ++i) {
+        CpuPort *port = new CpuPort(csprintf("%s-instDataPort%d", name(), i),
+                                    this, i, idx);
+        readPorts.push_back(port);
+        writePorts.push_back(port);
+        idx++;
     }
     for (int i = 0; i < p->port_cpuDataPort_connection_count; ++i) {
         CpuPort *port = new CpuPort(csprintf("%s-dataPort%d", name(), i),
-                                    this, i);
+                                    this, i, idx);
         readPorts.push_back(port);
         writePorts.push_back(port);
+        idx++;
     }
 
     // add the check start event to the event queue
@@ -108,6 +119,7 @@ RubyTester::init()
 
     m_num_writers = writePorts.size();
     m_num_readers = readPorts.size();
+    assert(m_num_readers == m_num_cpus);
 
     m_checkTable_ptr = new CheckTable(m_num_writers, m_num_readers, this);
 }
@@ -115,32 +127,45 @@ RubyTester::init()
 BaseMasterPort &
 RubyTester::getMasterPort(const std::string &if_name, PortID idx)
 {
-    if (if_name != "cpuInstPort" && if_name != "cpuDataPort") {
+    if (if_name != "cpuInstPort" && if_name != "cpuInstDataPort" &&
+        if_name != "cpuDataPort") {
         // pass it along to our super class
         return MemObject::getMasterPort(if_name, idx);
     } else {
         if (if_name == "cpuInstPort") {
-            if (idx > m_num_inst_ports) {
-                panic("RubyTester::getMasterPort: unknown inst port idx %d\n",
+            if (idx > m_num_inst_only_ports) {
+                panic("RubyTester::getMasterPort: unknown inst port %d\n",
                       idx);
             }
             //
-            // inst ports directly map to the lowest readPort elements
+            // inst ports map to the lowest readPort elements
             //
             return *readPorts[idx];
+        } else if (if_name == "cpuInstDataPort") {
+            if (idx > m_num_inst_data_ports) {
+                panic("RubyTester::getMasterPort: unknown inst+data port %d\n",
+                      idx);
+            }
+            int read_idx = idx + m_num_inst_only_ports;
+            //
+            // inst+data ports map to the next readPort elements
+            //
+            return *readPorts[read_idx];
         } else {
             assert(if_name == "cpuDataPort");
             //
-            // add the inst port offset to translate to the correct read port
-            // index
+            // data only ports map to the final readPort elements
             //
-            int read_idx = idx + m_num_inst_ports;
-            if (read_idx >= static_cast<PortID>(readPorts.size())) {
-                panic("RubyTester::getMasterPort: unknown data port idx %d\n",
+            if (idx > (static_cast<int>(readPorts.size()) -
+                       (m_num_inst_only_ports + m_num_inst_data_ports))) {
+                panic("RubyTester::getMasterPort: unknown data port %d\n",
                       idx);
             }
+            int read_idx = idx + m_num_inst_only_ports + m_num_inst_data_ports;
             return *readPorts[read_idx];
         }
+        // Note: currently the Ruby Tester does not support write only ports
+        // but that could easily be added here
     }
 }
 
@@ -152,7 +177,7 @@ RubyTester::CpuPort::recvTimingResp(PacketPtr pkt)
         safe_cast<RubyTester::SenderState*>(pkt->senderState);
     SubBlock& subblock = senderState->subBlock;
 
-    tester->hitCallback(id, &subblock);
+    tester->hitCallback(globalIdx, &subblock);
 
     // Now that the tester has completed, delete the senderState
     // (includes sublock) and the packet, then return
@@ -163,9 +188,16 @@ RubyTester::CpuPort::recvTimingResp(PacketPtr pkt)
 }
 
 bool
-RubyTester::isInstReadableCpuPort(int idx)
+RubyTester::isInstOnlyCpuPort(int idx)
+{
+    return idx < m_num_inst_only_ports;
+}
+
+bool
+RubyTester::isInstDataCpuPort(int idx)
 {
-    return idx < m_num_inst_ports;
+    return ((idx >= m_num_inst_only_ports) &&
+            (idx < (m_num_inst_only_ports + m_num_inst_data_ports)));
 }
 
 MasterPort*
@@ -190,13 +222,13 @@ RubyTester::hitCallback(NodeID proc, SubBlock* data)
     // Mark that we made progress
     m_last_progress_vector[proc] = curCycle();
 
-    DPRINTF(RubyTest, "completed request for proc: %d\n", proc);
-    DPRINTF(RubyTest, "addr: 0x%x, size: %d, data: ",
+    DPRINTF(RubyTest, "completed request for proc: %d", proc);
+    DPRINTFR(RubyTest, " addr: 0x%x, size: %d, data: ",
             data->getAddress(), data->getSize());
     for (int byte = 0; byte < data->getSize(); byte++) {
-        DPRINTF(RubyTest, "%d", data->getByte(byte));
+        DPRINTFR(RubyTest, "%d ", data->getByte(byte));
     }
-    DPRINTF(RubyTest, "\n");
+    DPRINTFR(RubyTest, "\n");
 
     // This tells us our store has 'completed' or for a load gives us
     // back the data to make the check
diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh
index 94a982e32..39e6d78a3 100644
--- a/src/cpu/testers/rubytest/RubyTester.hh
+++ b/src/cpu/testers/rubytest/RubyTester.hh
@@ -60,6 +60,8 @@ class RubyTester : public MemObject
     {
       private:
         RubyTester *tester;
+        // index for m_last_progress_vector and hitCallback
+        PortID globalIdx;
 
       public:
         //
@@ -68,8 +70,10 @@ class RubyTester : public MemObject
         // RubyPorts that support both types of requests, separate InstOnly
         // and DataOnly CpuPorts will map to that RubyPort
 
-        CpuPort(const std::string &_name, RubyTester *_tester, PortID _id)
-            : MasterPort(_name, _tester, _id), tester(_tester)
+        CpuPort(const std::string &_name, RubyTester *_tester, PortID _id,
+                PortID _index)
+            : MasterPort(_name, _tester, _id), tester(_tester),
+              globalIdx(_index)
         {}
 
       protected:
@@ -93,7 +97,8 @@ class RubyTester : public MemObject
     virtual BaseMasterPort &getMasterPort(const std::string &if_name,
                                           PortID idx = InvalidPortID);
 
-    bool isInstReadableCpuPort(int idx);
+    bool isInstOnlyCpuPort(int idx);
+    bool isInstDataCpuPort(int idx);
 
     MasterPort* getReadableCpuPort(int idx);
     MasterPort* getWritableCpuPort(int idx);
@@ -152,7 +157,8 @@ class RubyTester : public MemObject
     int m_num_readers;
     int m_wakeup_frequency;
     bool m_check_flush;
-    int m_num_inst_ports;
+    int m_num_inst_only_ports;
+    int m_num_inst_data_ports;
 };
 
 inline std::ostream&
diff --git a/src/cpu/testers/rubytest/RubyTester.py b/src/cpu/testers/rubytest/RubyTester.py
index 7af70cae0..f12485566 100644
--- a/src/cpu/testers/rubytest/RubyTester.py
+++ b/src/cpu/testers/rubytest/RubyTester.py
@@ -34,8 +34,9 @@ class RubyTester(MemObject):
     type = 'RubyTester'
     cxx_header = "cpu/testers/rubytest/RubyTester.hh"
     num_cpus = Param.Int("number of cpus / RubyPorts")
-    cpuDataPort = VectorMasterPort("the cpu data cache ports")
-    cpuInstPort = VectorMasterPort("the cpu inst cache ports")
+    cpuInstDataPort = VectorMasterPort("cpu combo ports to inst & data caches")
+    cpuInstPort = VectorMasterPort("cpu ports to only inst caches")
+    cpuDataPort = VectorMasterPort("cpu ports to only data caches")
     checks_to_complete = Param.Int(100, "checks to complete")
     deadlock_threshold = Param.Int(50000, "how often to check for deadlock")
     wakeup_frequency = Param.Int(10, "number of cycles between wakeups")
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index b2fb8d72d..95a83873c 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -11,7 +11,7 @@
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
- * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ * Copyright (c) 2009-2013 Advanced Micro Devices, Inc.
  * Copyright (c) 2011 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
@@ -58,7 +58,8 @@ RubyPort::RubyPort(const Params *p)
       pioSlavePort(csprintf("%s.pio-slave-port", name()), this),
       memMasterPort(csprintf("%s.mem-master-port", name()), this),
       memSlavePort(csprintf("%s-mem-slave-port", name()), this,
-                   p->ruby_system->getAccessBackingStore(), -1),
+                   p->ruby_system->getAccessBackingStore(), -1,
+                   p->no_retry_on_stall),
       gotAddrRanges(p->port_master_connection_count)
 {
     assert(m_version != -1);
@@ -66,7 +67,8 @@ RubyPort::RubyPort(const Params *p)
     // create the slave ports based on the number of connected ports
     for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
         slave_ports.push_back(new MemSlavePort(csprintf("%s.slave%d", name(),
-            i), this, p->ruby_system->getAccessBackingStore(), i));
+            i), this, p->ruby_system->getAccessBackingStore(),
+            i, p->no_retry_on_stall));
     }
 
     // create the master ports based on the number of connected ports
@@ -156,9 +158,11 @@ RubyPort::MemMasterPort::MemMasterPort(const std::string &_name,
 }
 
 RubyPort::MemSlavePort::MemSlavePort(const std::string &_name, RubyPort *_port,
-                                     bool _access_backing_store, PortID id)
+                                     bool _access_backing_store, PortID id,
+                                     bool _no_retry_on_stall)
     : QueuedSlavePort(_name, _port, queue, id), queue(*_port, *this),
-      access_backing_store(_access_backing_store)
+      access_backing_store(_access_backing_store),
+      no_retry_on_stall(_no_retry_on_stall)
 {
     DPRINTF(RubyPort, "Created slave memport on ruby sequencer %s\n", _name);
 }
@@ -267,21 +271,31 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
         return true;
     }
 
-    //
-    // Unless one is using the ruby tester, record the stalled M5 port for
-    // later retry when the sequencer becomes free.
-    //
-    if (!ruby_port->m_usingRubyTester) {
-        ruby_port->addToRetryList(this);
-    }
 
     DPRINTF(RubyPort, "Request for address %#x did not issued because %s\n",
             pkt->getAddr(), RequestStatus_to_string(requestStatus));
 
+    addToRetryList();
+
     return false;
 }
 
 void
+RubyPort::MemSlavePort::addToRetryList()
+{
+    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
+
+    //
+    // Unless the requestor do not want retries (e.g., the Ruby tester),
+    // record the stalled M5 port for later retry when the sequencer
+    // becomes free.
+    //
+    if (!no_retry_on_stall && !ruby_port->onRetryList(this)) {
+        ruby_port->addToRetryList(this);
+    }
+}
+
+void
 RubyPort::MemSlavePort::recvFunctional(PacketPtr pkt)
 {
     DPRINTF(RubyPort, "Functional access for address: %#x\n", pkt->getAddr());
@@ -356,31 +370,33 @@ RubyPort::ruby_hit_callback(PacketPtr pkt)
 
     port->hitCallback(pkt);
 
+    trySendRetries();
+}
+
+void
+RubyPort::trySendRetries()
+{
     //
     // If we had to stall the MemSlavePorts, wake them up because the sequencer
     // likely has free resources now.
     //
     if (!retryList.empty()) {
-        //
-        // Record the current list of ports to retry on a temporary list before
-        // calling sendRetry on those ports.  sendRetry will cause an
-        // immediate retry, which may result in the ports being put back on the
-        // list. Therefore we want to clear the retryList before calling
-        // sendRetry.
-        //
+        // Record the current list of ports to retry on a temporary list
+        // before calling sendRetryReq on those ports. sendRetryReq will cause
+        // an immediate retry, which may result in the ports being put back on
+        // the list. Therefore we want to clear the retryList before calling
+        // sendRetryReq.
         std::vector<MemSlavePort *> curRetryList(retryList);
 
         retryList.clear();
 
         for (auto i = curRetryList.begin(); i != curRetryList.end(); ++i) {
             DPRINTF(RubyPort,
-                    "Sequencer may now be free.  SendRetry to port %s\n",
+                    "Sequencer may now be free. SendRetry to port %s\n",
                     (*i)->name());
             (*i)->sendRetryReq();
         }
     }
-
-    testDrainComplete();
 }
 
 void
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index 58d2558dd..07e0fde5a 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -11,7 +11,7 @@
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
- * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ * Copyright (c) 2009-2013 Advanced Micro Devices, Inc.
  * Copyright (c) 2011 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
@@ -76,10 +76,12 @@ class RubyPort : public MemObject
       private:
         RespPacketQueue queue;
         bool access_backing_store;
+        bool no_retry_on_stall;
 
       public:
         MemSlavePort(const std::string &_name, RubyPort *_port,
-                     bool _access_backing_store, PortID id);
+                     bool _access_backing_store,
+                     PortID id, bool _no_retry_on_stall);
         void hitCallback(PacketPtr pkt);
         void evictionCallback(Addr address);
 
@@ -94,6 +96,8 @@ class RubyPort : public MemObject
         AddrRangeList getAddrRanges() const
         { AddrRangeList ranges; return ranges; }
 
+        void addToRetryList();
+
       private:
         bool isPhysMemAddress(Addr addr) const;
     };
@@ -164,6 +168,7 @@ class RubyPort : public MemObject
     DrainState drain() override;
 
   protected:
+    void trySendRetries();
     void ruby_hit_callback(PacketPtr pkt);
     void testDrainComplete();
     void ruby_eviction_callback(Addr address);
@@ -186,10 +191,14 @@ class RubyPort : public MemObject
     System* system;
 
   private:
+    bool onRetryList(MemSlavePort * port)
+    {
+        return (std::find(retryList.begin(), retryList.end(), port) !=
+                retryList.end());
+    }
     void addToRetryList(MemSlavePort * port)
     {
-        if (std::find(retryList.begin(), retryList.end(), port) !=
-               retryList.end()) return;
+        if (onRetryList(port)) return;
         retryList.push_back(port);
     }
 
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 26db6b6f8..50418c700 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -491,6 +491,7 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
         rs->m_cache_recorder->enqueueNextFlushRequest();
     } else {
         ruby_hit_callback(pkt);
+        testDrainComplete();
     }
 }
 
diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py
index 7494986e9..7c90eb29c 100644
--- a/src/mem/ruby/system/Sequencer.py
+++ b/src/mem/ruby/system/Sequencer.py
@@ -45,6 +45,7 @@ class RubyPort(MemObject):
     mem_slave_port = SlavePort("Ruby memory port")
 
     using_ruby_tester = Param.Bool(False, "")
+    no_retry_on_stall = Param.Bool(False, "")
     ruby_system = Param.RubySystem(Parent.any, "")
     system = Param.System(Parent.any, "system object")
     support_data_reqs = Param.Bool(True, "data cache requests supported")
author	Brad Beckmann <Brad.Beckmann@amd.com>	2015-07-20 09:15:18 -0500
committer	Brad Beckmann <Brad.Beckmann@amd.com>	2015-07-20 09:15:18 -0500
commit	173a7869219534de5053889a84e1006281ec7645 (patch)
tree	0cbd44d0669204d4fb3a551e98b1b668f8e6bee4
parent	4e6241007c514c3f90e9aeebf7cfd92853e45850 (diff)
download	gem5-173a7869219534de5053889a84e1006281ec7645.tar.xz