diff options
-rw-r--r-- | configs/example/ruby_random_test.py | 13 | ||||
-rw-r--r-- | configs/ruby/MESI_Three_Level.py | 82 | ||||
-rw-r--r-- | configs/ruby/MESI_Two_Level.py | 48 | ||||
-rw-r--r-- | configs/ruby/MI_example.py | 37 | ||||
-rw-r--r-- | configs/ruby/MOESI_CMP_directory.py | 37 | ||||
-rw-r--r-- | configs/ruby/MOESI_CMP_token.py | 54 | ||||
-rw-r--r-- | configs/ruby/MOESI_hammer.py | 40 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/Check.cc | 23 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/CheckTable.cc | 7 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/RubyTester.cc | 70 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/RubyTester.hh | 14 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/RubyTester.py | 5 | ||||
-rw-r--r-- | src/mem/ruby/system/RubyPort.cc | 60 | ||||
-rw-r--r-- | src/mem/ruby/system/RubyPort.hh | 17 | ||||
-rw-r--r-- | src/mem/ruby/system/Sequencer.cc | 1 | ||||
-rw-r--r-- | src/mem/ruby/system/Sequencer.py | 1 |
16 files changed, 326 insertions, 183 deletions
diff --git a/configs/example/ruby_random_test.py b/configs/example/ruby_random_test.py index 225b3d23b..10d4318c7 100644 --- a/configs/example/ruby_random_test.py +++ b/configs/example/ruby_random_test.py @@ -125,10 +125,15 @@ for ruby_port in system.ruby._cpu_ports: # # Tie the ruby tester ports to the ruby cpu read and write ports # - if ruby_port.support_data_reqs: - tester.cpuDataPort = ruby_port.slave - if ruby_port.support_inst_reqs: - tester.cpuInstPort = ruby_port.slave + if ruby_port.support_data_reqs and ruby_port.support_inst_reqs: + tester.cpuInstDataPort = ruby_port.slave + elif ruby_port.support_data_reqs: + tester.cpuDataPort = ruby_port.slave + elif ruby_port.support_inst_reqs: + tester.cpuInstPort = ruby_port.slave + + # Do not automatically retry stalled Ruby requests + ruby_port.no_retry_on_stall = True # # Tell each sequencer this is the ruby tester so that it diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py index 9db9d27cd..1d4b6ebf6 100644 --- a/configs/ruby/MESI_Three_Level.py +++ b/configs/ruby/MESI_Three_Level.py @@ -1,5 +1,5 @@ # Copyright (c) 2006-2007 The Regents of The University of Michigan -# Copyright (c) 2009 Advanced Micro Devices, Inc. +# Copyright (c) 2009,2015 Advanced Micro Devices, Inc. # Copyright (c) 2013 Mark D. Hill and David A. Wood # All rights reserved. # @@ -44,22 +44,24 @@ class L1Cache(RubyCache): pass class L2Cache(RubyCache): pass def define_options(parser): - parser.add_option("--num-clusters", type="int", default=1, - help="number of clusters in a design in which there are shared\ + parser.add_option("--num-clusters", type = "int", default = 1, + help = "number of clusters in a design in which there are shared\ caches private to clusters") return def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Three_Level': - fatal("This script requires the MESI_Three_Level protocol to be built.") + fatal("This script requires the MESI_Three_Level protocol to be\ + built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be - # consistent with the NetDest list. Therefore the l1 controller nodes must be - # listed before the directory nodes and directory nodes before dma nodes, etc. + # consistent with the NetDest list. Therefore the l1 controller nodes + # must be listed before the directory nodes and directory nodes before + # dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] @@ -94,30 +96,45 @@ def create_system(options, full_system, system, dma_ports, ruby_system): start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) - l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j, - Icache = l0i_cache, Dcache = l0d_cache, - send_evictions = send_evicts(options), - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l0_cntrl = L0Cache_Controller( + version = i * num_cpus_per_cluster + j, Icache = l0i_cache, + Dcache = l0d_cache, send_evictions = send_evicts(options), + clk_domain = clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j, - icache = l0i_cache, - clk_domain=system.cpu[i].clk_domain, - dcache = l0d_cache, ruby_system = ruby_system) + icache = l0i_cache, + clk_domain = clk_domain, + dcache = l0d_cache, + ruby_system = ruby_system) l0_cntrl.sequencer = cpu_seq - l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, - start_index_bit = block_size_bits, is_icache = False) + l1_cache = L1Cache(size = options.l1d_size, + assoc = options.l1d_assoc, + start_index_bit = block_size_bits, + is_icache = False) - l1_cntrl = L1Cache_Controller(version = i*num_cpus_per_cluster+j, - cache = l1_cache, l2_select_num_bits = l2_bits, - cluster_id = i, ruby_system = ruby_system) + l1_cntrl = L1Cache_Controller( + version = i * num_cpus_per_cluster + j, + cache = l1_cache, l2_select_num_bits = l2_bits, + cluster_id = i, ruby_system = ruby_system) - exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( - i*num_cpus_per_cluster+j)) - exec("ruby_system.l1_cntrl%d = l1_cntrl" % ( - i*num_cpus_per_cluster+j)) + exec("ruby_system.l0_cntrl%d = l0_cntrl" + % ( i * num_cpus_per_cluster + j)) + exec("ruby_system.l1_cntrl%d = l1_cntrl" + % ( i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists @@ -155,11 +172,11 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l2_cntrl = L2Cache_Controller( version = i * num_l2caches_per_cluster + j, L2cache = l2_cache, cluster_id = i, - transitions_per_cycle=options.ports, + transitions_per_cycle = options.ports, ruby_system = ruby_system) - exec("ruby_system.l2_cntrl%d = l2_cntrl" % ( - i * num_l2caches_per_cluster + j)) + exec("ruby_system.l2_cntrl%d = l2_cntrl" + % (i * num_l2caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network @@ -185,8 +202,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( - clk_domain=ruby_system.clk_domain, - clk_divider=3) + clk_domain = ruby_system.clk_domain, clk_divider = 3) for i in xrange(options.num_dirs): # @@ -196,10 +212,9 @@ def create_system(options, full_system, system, dma_ports, ruby_system): dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + directory = RubyDirectoryMemory(version = i, size = dir_size), + transitions_per_cycle = options.ports, + ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) @@ -217,8 +232,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # # Create the Ruby objects associated with the dma controller # - dma_seq = DMASequencer(version = i, - ruby_system = ruby_system) + dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py index 9997a117b..4cfa54bd8 100644 --- a/configs/ruby/MESI_Two_Level.py +++ b/configs/ruby/MESI_Two_Level.py @@ -82,23 +82,33 @@ def create_system(options, full_system, system, dma_ports, ruby_system): prefetcher = RubyPrefetcher.Prefetcher() - l1_cntrl = L1Cache_Controller(version = i, - L1Icache = l1i_cache, + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, l2_select_num_bits = l2_bits, send_evictions = send_evicts(options), prefetcher = prefetcher, ruby_system = ruby_system, - clk_domain=system.cpu[i].clk_domain, - transitions_per_cycle=options.ports, + clk_domain = clk_domain, + transitions_per_cycle = options.ports, enable_prefetch = False) - cpu_seq = RubySequencer(version = i, - icache = l1i_cache, - dcache = l1d_cache, - clk_domain=system.cpu[i].clk_domain, + cpu_seq = RubySequencer(version = i, icache = l1i_cache, + dcache = l1d_cache, clk_domain = clk_domain, ruby_system = ruby_system) + l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) @@ -135,7 +145,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, - transitions_per_cycle=options.ports, + transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) @@ -166,18 +176,17 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( - clk_domain=ruby_system.clk_domain, - clk_divider=3) + clk_domain = ruby_system.clk_domain, + clk_divider = 3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + directory = RubyDirectoryMemory(version = i, size = dir_size), + transitions_per_cycle = options.ports, + ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) @@ -194,12 +203,10 @@ def create_system(options, full_system, system, dma_ports, ruby_system): for i, dma_port in enumerate(dma_ports): # Create the Ruby objects associated with the dma controller - dma_seq = DMASequencer(version = i, - ruby_system = ruby_system, + dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) - dma_cntrl = DMA_Controller(version = i, - dma_sequencer = dma_seq, + dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) @@ -220,7 +227,8 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # Create the io controller and the sequencer if full_system: - io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) + io_seq = DMASequencer(version = len(dma_ports), + ruby_system = ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py index 6f28c6ade..24b0f9716 100644 --- a/configs/ruby/MI_example.py +++ b/configs/ruby/MI_example.py @@ -74,21 +74,28 @@ def create_system(options, full_system, system, dma_ports, ruby_system): assoc = options.l1d_assoc, start_index_bit = block_size_bits) - # - # Only one unified L1 cache exists. Can cache instructions and data. - # - l1_cntrl = L1Cache_Controller(version = i, - cacheMemory = cache, - send_evictions = send_evicts(options), - transitions_per_cycle = options.ports, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) - - cpu_seq = RubySequencer(version = i, - icache = cache, - dcache = cache, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + # Only one unified L1 cache exists. Can cache instructions and data. + l1_cntrl = L1Cache_Controller(version=i, cacheMemory=cache, + send_evictions=send_evicts(options), + transitions_per_cycle=options.ports, + clk_domain=clk_domain, + ruby_system=ruby_system) + + cpu_seq = RubySequencer(version=i, icache=cache, dcache=cache, + clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py index cdf8688f0..a72b5b20e 100644 --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -80,20 +80,29 @@ def create_system(options, full_system, system, dma_ports, ruby_system): start_index_bit = block_size_bits, is_icache = False) - l1_cntrl = L1Cache_Controller(version = i, - L1Icache = l1i_cache, - L1Dcache = l1d_cache, - l2_select_num_bits = l2_bits, - send_evictions = send_evicts(options), - transitions_per_cycle = options.ports, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) - - cpu_seq = RubySequencer(version = i, - icache = l1i_cache, - dcache = l1d_cache, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, + L1Dcache=l1d_cache, + l2_select_num_bits=l2_bits, + send_evictions=send_evicts(options), + transitions_per_cycle=options.ports, + clk_domain=clk_domain, + ruby_system=ruby_system) + + cpu_seq = RubySequencer(version=i, icache=l1i_cache, + dcache=l1d_cache, clk_domain=clk_domain, + ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py index 23c6d9fef..7161544b7 100644 --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -91,29 +91,37 @@ def create_system(options, full_system, system, dma_ports, ruby_system): assoc = options.l1d_assoc, start_index_bit = block_size_bits) - l1_cntrl = L1Cache_Controller(version = i, - L1Icache = l1i_cache, - L1Dcache = l1d_cache, - l2_select_num_bits = l2_bits, - N_tokens = n_tokens, - retry_threshold = \ - options.l1_retries, - fixed_timeout_latency = \ - options.timeout_latency, - dynamic_timeout_enabled = \ - not options.disable_dyn_timeouts, - no_mig_atomic = not \ - options.allow_atomic_migration, - send_evictions = send_evicts(options), - transitions_per_cycle = options.ports, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) - - cpu_seq = RubySequencer(version = i, - icache = l1i_cache, - dcache = l1d_cache, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, + L1Dcache=l1d_cache, + l2_select_num_bits=l2_bits, + N_tokens=n_tokens, + retry_threshold=options.l1_retries, + fixed_timeout_latency=\ + options.timeout_latency, + dynamic_timeout_enabled=\ + not options.disable_dyn_timeouts, + no_mig_atomic=not \ + options.allow_atomic_migration, + send_evictions=send_evicts(options), + transitions_per_cycle=options.ports, + clk_domain=clk_domain, + ruby_system=ruby_system) + + cpu_seq = RubySequencer(version=i, icache=l1i_cache, + dcache=l1d_cache, clk_domain=clk_domain, + ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py index 0860fb682..6a1cfd70b 100644 --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -89,22 +89,30 @@ def create_system(options, full_system, system, dma_ports, ruby_system): assoc = options.l2_assoc, start_index_bit = block_size_bits) - l1_cntrl = L1Cache_Controller(version = i, - L1Icache = l1i_cache, - L1Dcache = l1d_cache, - L2cache = l2_cache, - no_mig_atomic = not \ - options.allow_atomic_migration, - send_evictions = send_evicts(options), - transitions_per_cycle = options.ports, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) - - cpu_seq = RubySequencer(version = i, - icache = l1i_cache, - dcache = l1d_cache, - clk_domain=system.cpu[i].clk_domain, - ruby_system = ruby_system) + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, + L1Dcache=l1d_cache, L2cache=l2_cache, + no_mig_atomic=not \ + options.allow_atomic_migration, + send_evictions=send_evicts(options), + transitions_per_cycle=options.ports, + clk_domain=clk_domain, + ruby_system=ruby_system) + + cpu_seq = RubySequencer(version=i, icache=l1i_cache, + dcache=l1d_cache,clk_domain=clk_domain, + ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc index 4cdaf9b2f..c8e7816c3 100644 --- a/src/cpu/testers/rubytest/Check.cc +++ b/src/cpu/testers/rubytest/Check.cc @@ -94,7 +94,9 @@ Check::initiatePrefetch() cmd = MemCmd::ReadReq; // if necessary, make the request an instruction fetch - if (m_tester_ptr->isInstReadableCpuPort(index)) { + if (m_tester_ptr->isInstOnlyCpuPort(index) || + (m_tester_ptr->isInstDataCpuPort(index) && + (random_mt.random(0, 0x1)))) { flags.set(Request::INST_FETCH); } } else { @@ -193,7 +195,7 @@ Check::initiateAction() *writeData = m_value + m_store_count; pkt->dataDynamic(writeData); - DPRINTF(RubyTest, "data 0x%x check 0x%x\n", + DPRINTF(RubyTest, "Seq write: index %d data 0x%x check 0x%x\n", index, *(pkt->getConstPtr<uint8_t>()), *writeData); // push the subblock onto the sender state. The sequencer will @@ -205,6 +207,7 @@ Check::initiateAction() DPRINTF(RubyTest, "status before action update: %s\n", (TesterStatus_to_string(m_status)).c_str()); m_status = TesterStatus_Action_Pending; + DPRINTF(RubyTest, "Check %s, State=Action_Pending\n", m_address); } else { // If the packet did not issue, must delete // Note: No need to delete the data, the packet destructor @@ -232,7 +235,9 @@ Check::initiateCheck() Request::Flags flags; // If necessary, make the request an instruction fetch - if (m_tester_ptr->isInstReadableCpuPort(index)) { + if (m_tester_ptr->isInstOnlyCpuPort(index) || + (m_tester_ptr->isInstDataCpuPort(index) && + (random_mt.random(0, 0x1)))) { flags.set(Request::INST_FETCH); } @@ -245,6 +250,8 @@ Check::initiateCheck() uint8_t *dataArray = new uint8_t[CHECK_SIZE]; pkt->dataDynamic(dataArray); + DPRINTF(RubyTest, "Seq read: index %d\n", index); + // push the subblock onto the sender state. The sequencer will // update the subblock on the return pkt->senderState = new SenderState(m_address, req->getSize()); @@ -254,6 +261,7 @@ Check::initiateCheck() DPRINTF(RubyTest, "status before check update: %s\n", TesterStatus_to_string(m_status).c_str()); m_status = TesterStatus_Check_Pending; + DPRINTF(RubyTest, "Check %s, State=Check_Pending\n", m_address); } else { // If the packet did not issue, must delete // Note: No need to delete the data, the packet destructor @@ -291,8 +299,11 @@ Check::performCallback(NodeID proc, SubBlock* data, Cycles curTime) m_store_count++; if (m_store_count == CHECK_SIZE) { m_status = TesterStatus_Ready; + DPRINTF(RubyTest, "Check %s, State=Ready\n", m_address); } else { m_status = TesterStatus_Idle; + DPRINTF(RubyTest, "Check %s, State=Idle store_count: %d\n", + m_address, m_store_count); } DPRINTF(RubyTest, "Action callback return data now %d\n", data->getByte(0)); @@ -316,6 +327,7 @@ Check::performCallback(NodeID proc, SubBlock* data, Cycles curTime) m_tester_ptr->incrementCheckCompletions(); m_status = TesterStatus_Idle; + DPRINTF(RubyTest, "Check %s, State=Idle\n", m_address); pickValue(); } else { @@ -335,6 +347,7 @@ Check::changeAddress(Addr address) assert(m_status == TesterStatus_Idle || m_status == TesterStatus_Ready); m_status = TesterStatus_Idle; m_address = address; + DPRINTF(RubyTest, "Check %s, State=Idle\n", m_address); m_store_count = 0; } @@ -342,7 +355,6 @@ void Check::pickValue() { assert(m_status == TesterStatus_Idle); - m_status = TesterStatus_Idle; m_value = random_mt.random(0, 0xff); // One byte m_store_count = 0; } @@ -353,7 +365,8 @@ Check::pickInitiatingNode() assert(m_status == TesterStatus_Idle || m_status == TesterStatus_Ready); m_status = TesterStatus_Idle; m_initiatingNode = (random_mt.random(0, m_num_writers - 1)); - DPRINTF(RubyTest, "picked initiating node %d\n", m_initiatingNode); + DPRINTF(RubyTest, "Check %s, State=Idle, picked initiating node %d\n", + m_address, m_initiatingNode); m_store_count = 0; } diff --git a/src/cpu/testers/rubytest/CheckTable.cc b/src/cpu/testers/rubytest/CheckTable.cc index b75fd0a52..3bdd73f27 100644 --- a/src/cpu/testers/rubytest/CheckTable.cc +++ b/src/cpu/testers/rubytest/CheckTable.cc @@ -42,6 +42,7 @@ CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester) const int size1 = 32; const int size2 = 100; + DPRINTF(RubyTest, "Adding false sharing checks\n"); // The first set is to get some false sharing physical = 1000; for (int i = 0; i < size1; i++) { @@ -50,6 +51,7 @@ CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester) physical += CHECK_SIZE; } + DPRINTF(RubyTest, "Adding cache conflict checks\n"); // The next two sets are to get some limited false sharing and // cache conflicts physical = 1000; @@ -59,6 +61,7 @@ CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester) physical += 256; } + DPRINTF(RubyTest, "Adding cache conflict checks2\n"); physical = 1000 + CHECK_SIZE; for (int i = 0; i < size2; i++) { // Setup linear addresses @@ -91,6 +94,8 @@ CheckTable::addCheck(Addr address) } } + DPRINTF(RubyTest, "Adding check for address: %s\n", address); + Check* check_ptr = new Check(address, 100 + m_check_vector.size(), m_num_writers, m_num_readers, m_tester_ptr); for (int i = 0; i < CHECK_SIZE; i++) { @@ -110,7 +115,7 @@ CheckTable::getRandomCheck() Check* CheckTable::getCheck(const Addr address) { - DPRINTF(RubyTest, "Looking for check by address: %s", address); + DPRINTF(RubyTest, "Looking for check by address: %s\n", address); auto i = m_lookup_map.find(address); diff --git a/src/cpu/testers/rubytest/RubyTester.cc b/src/cpu/testers/rubytest/RubyTester.cc index e0f30f552..5ed6d7f66 100644 --- a/src/cpu/testers/rubytest/RubyTester.cc +++ b/src/cpu/testers/rubytest/RubyTester.cc @@ -58,7 +58,8 @@ RubyTester::RubyTester(const Params *p) m_num_readers(0), m_wakeup_frequency(p->wakeup_frequency), m_check_flush(p->check_flush), - m_num_inst_ports(p->port_cpuInstPort_connection_count) + m_num_inst_only_ports(p->port_cpuInstPort_connection_count), + m_num_inst_data_ports(p->port_cpuInstDataPort_connection_count) { m_checks_completed = 0; @@ -73,15 +74,25 @@ RubyTester::RubyTester(const Params *p) // Note: the inst ports are the lowest elements of the readPort vector, // then the data ports are added to the readPort vector // + int idx = 0; for (int i = 0; i < p->port_cpuInstPort_connection_count; ++i) { readPorts.push_back(new CpuPort(csprintf("%s-instPort%d", name(), i), - this, i)); + this, i, idx)); + idx++; + } + for (int i = 0; i < p->port_cpuInstDataPort_connection_count; ++i) { + CpuPort *port = new CpuPort(csprintf("%s-instDataPort%d", name(), i), + this, i, idx); + readPorts.push_back(port); + writePorts.push_back(port); + idx++; } for (int i = 0; i < p->port_cpuDataPort_connection_count; ++i) { CpuPort *port = new CpuPort(csprintf("%s-dataPort%d", name(), i), - this, i); + this, i, idx); readPorts.push_back(port); writePorts.push_back(port); + idx++; } // add the check start event to the event queue @@ -108,6 +119,7 @@ RubyTester::init() m_num_writers = writePorts.size(); m_num_readers = readPorts.size(); + assert(m_num_readers == m_num_cpus); m_checkTable_ptr = new CheckTable(m_num_writers, m_num_readers, this); } @@ -115,32 +127,45 @@ RubyTester::init() BaseMasterPort & RubyTester::getMasterPort(const std::string &if_name, PortID idx) { - if (if_name != "cpuInstPort" && if_name != "cpuDataPort") { + if (if_name != "cpuInstPort" && if_name != "cpuInstDataPort" && + if_name != "cpuDataPort") { // pass it along to our super class return MemObject::getMasterPort(if_name, idx); } else { if (if_name == "cpuInstPort") { - if (idx > m_num_inst_ports) { - panic("RubyTester::getMasterPort: unknown inst port idx %d\n", + if (idx > m_num_inst_only_ports) { + panic("RubyTester::getMasterPort: unknown inst port %d\n", idx); } // - // inst ports directly map to the lowest readPort elements + // inst ports map to the lowest readPort elements // return *readPorts[idx]; + } else if (if_name == "cpuInstDataPort") { + if (idx > m_num_inst_data_ports) { + panic("RubyTester::getMasterPort: unknown inst+data port %d\n", + idx); + } + int read_idx = idx + m_num_inst_only_ports; + // + // inst+data ports map to the next readPort elements + // + return *readPorts[read_idx]; } else { assert(if_name == "cpuDataPort"); // - // add the inst port offset to translate to the correct read port - // index + // data only ports map to the final readPort elements // - int read_idx = idx + m_num_inst_ports; - if (read_idx >= static_cast<PortID>(readPorts.size())) { - panic("RubyTester::getMasterPort: unknown data port idx %d\n", + if (idx > (static_cast<int>(readPorts.size()) - + (m_num_inst_only_ports + m_num_inst_data_ports))) { + panic("RubyTester::getMasterPort: unknown data port %d\n", idx); } + int read_idx = idx + m_num_inst_only_ports + m_num_inst_data_ports; return *readPorts[read_idx]; } + // Note: currently the Ruby Tester does not support write only ports + // but that could easily be added here } } @@ -152,7 +177,7 @@ RubyTester::CpuPort::recvTimingResp(PacketPtr pkt) safe_cast<RubyTester::SenderState*>(pkt->senderState); SubBlock& subblock = senderState->subBlock; - tester->hitCallback(id, &subblock); + tester->hitCallback(globalIdx, &subblock); // Now that the tester has completed, delete the senderState // (includes sublock) and the packet, then return @@ -163,9 +188,16 @@ RubyTester::CpuPort::recvTimingResp(PacketPtr pkt) } bool -RubyTester::isInstReadableCpuPort(int idx) +RubyTester::isInstOnlyCpuPort(int idx) +{ + return idx < m_num_inst_only_ports; +} + +bool +RubyTester::isInstDataCpuPort(int idx) { - return idx < m_num_inst_ports; + return ((idx >= m_num_inst_only_ports) && + (idx < (m_num_inst_only_ports + m_num_inst_data_ports))); } MasterPort* @@ -190,13 +222,13 @@ RubyTester::hitCallback(NodeID proc, SubBlock* data) // Mark that we made progress m_last_progress_vector[proc] = curCycle(); - DPRINTF(RubyTest, "completed request for proc: %d\n", proc); - DPRINTF(RubyTest, "addr: 0x%x, size: %d, data: ", + DPRINTF(RubyTest, "completed request for proc: %d", proc); + DPRINTFR(RubyTest, " addr: 0x%x, size: %d, data: ", data->getAddress(), data->getSize()); for (int byte = 0; byte < data->getSize(); byte++) { - DPRINTF(RubyTest, "%d", data->getByte(byte)); + DPRINTFR(RubyTest, "%d ", data->getByte(byte)); } - DPRINTF(RubyTest, "\n"); + DPRINTFR(RubyTest, "\n"); // This tells us our store has 'completed' or for a load gives us // back the data to make the check diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh index 94a982e32..39e6d78a3 100644 --- a/src/cpu/testers/rubytest/RubyTester.hh +++ b/src/cpu/testers/rubytest/RubyTester.hh @@ -60,6 +60,8 @@ class RubyTester : public MemObject { private: RubyTester *tester; + // index for m_last_progress_vector and hitCallback + PortID globalIdx; public: // @@ -68,8 +70,10 @@ class RubyTester : public MemObject // RubyPorts that support both types of requests, separate InstOnly // and DataOnly CpuPorts will map to that RubyPort - CpuPort(const std::string &_name, RubyTester *_tester, PortID _id) - : MasterPort(_name, _tester, _id), tester(_tester) + CpuPort(const std::string &_name, RubyTester *_tester, PortID _id, + PortID _index) + : MasterPort(_name, _tester, _id), tester(_tester), + globalIdx(_index) {} protected: @@ -93,7 +97,8 @@ class RubyTester : public MemObject virtual BaseMasterPort &getMasterPort(const std::string &if_name, PortID idx = InvalidPortID); - bool isInstReadableCpuPort(int idx); + bool isInstOnlyCpuPort(int idx); + bool isInstDataCpuPort(int idx); MasterPort* getReadableCpuPort(int idx); MasterPort* getWritableCpuPort(int idx); @@ -152,7 +157,8 @@ class RubyTester : public MemObject int m_num_readers; int m_wakeup_frequency; bool m_check_flush; - int m_num_inst_ports; + int m_num_inst_only_ports; + int m_num_inst_data_ports; }; inline std::ostream& diff --git a/src/cpu/testers/rubytest/RubyTester.py b/src/cpu/testers/rubytest/RubyTester.py index 7af70cae0..f12485566 100644 --- a/src/cpu/testers/rubytest/RubyTester.py +++ b/src/cpu/testers/rubytest/RubyTester.py @@ -34,8 +34,9 @@ class RubyTester(MemObject): type = 'RubyTester' cxx_header = "cpu/testers/rubytest/RubyTester.hh" num_cpus = Param.Int("number of cpus / RubyPorts") - cpuDataPort = VectorMasterPort("the cpu data cache ports") - cpuInstPort = VectorMasterPort("the cpu inst cache ports") + cpuInstDataPort = VectorMasterPort("cpu combo ports to inst & data caches") + cpuInstPort = VectorMasterPort("cpu ports to only inst caches") + cpuDataPort = VectorMasterPort("cpu ports to only data caches") checks_to_complete = Param.Int(100, "checks to complete") deadlock_threshold = Param.Int(50000, "how often to check for deadlock") wakeup_frequency = Param.Int(10, "number of cycles between wakeups") diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index b2fb8d72d..95a83873c 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -11,7 +11,7 @@ * unmodified and in its entirety in all distributions of the software, * modified or unmodified, in source code or in binary form. * - * Copyright (c) 2009 Advanced Micro Devices, Inc. + * Copyright (c) 2009-2013 Advanced Micro Devices, Inc. * Copyright (c) 2011 Mark D. Hill and David A. Wood * All rights reserved. * @@ -58,7 +58,8 @@ RubyPort::RubyPort(const Params *p) pioSlavePort(csprintf("%s.pio-slave-port", name()), this), memMasterPort(csprintf("%s.mem-master-port", name()), this), memSlavePort(csprintf("%s-mem-slave-port", name()), this, - p->ruby_system->getAccessBackingStore(), -1), + p->ruby_system->getAccessBackingStore(), -1, + p->no_retry_on_stall), gotAddrRanges(p->port_master_connection_count) { assert(m_version != -1); @@ -66,7 +67,8 @@ RubyPort::RubyPort(const Params *p) // create the slave ports based on the number of connected ports for (size_t i = 0; i < p->port_slave_connection_count; ++i) { slave_ports.push_back(new MemSlavePort(csprintf("%s.slave%d", name(), - i), this, p->ruby_system->getAccessBackingStore(), i)); + i), this, p->ruby_system->getAccessBackingStore(), + i, p->no_retry_on_stall)); } // create the master ports based on the number of connected ports @@ -156,9 +158,11 @@ RubyPort::MemMasterPort::MemMasterPort(const std::string &_name, } RubyPort::MemSlavePort::MemSlavePort(const std::string &_name, RubyPort *_port, - bool _access_backing_store, PortID id) + bool _access_backing_store, PortID id, + bool _no_retry_on_stall) : QueuedSlavePort(_name, _port, queue, id), queue(*_port, *this), - access_backing_store(_access_backing_store) + access_backing_store(_access_backing_store), + no_retry_on_stall(_no_retry_on_stall) { DPRINTF(RubyPort, "Created slave memport on ruby sequencer %s\n", _name); } @@ -267,21 +271,31 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt) return true; } - // - // Unless one is using the ruby tester, record the stalled M5 port for - // later retry when the sequencer becomes free. - // - if (!ruby_port->m_usingRubyTester) { - ruby_port->addToRetryList(this); - } DPRINTF(RubyPort, "Request for address %#x did not issued because %s\n", pkt->getAddr(), RequestStatus_to_string(requestStatus)); + addToRetryList(); + return false; } void +RubyPort::MemSlavePort::addToRetryList() +{ + RubyPort *ruby_port = static_cast<RubyPort *>(&owner); + + // + // Unless the requestor do not want retries (e.g., the Ruby tester), + // record the stalled M5 port for later retry when the sequencer + // becomes free. + // + if (!no_retry_on_stall && !ruby_port->onRetryList(this)) { + ruby_port->addToRetryList(this); + } +} + +void RubyPort::MemSlavePort::recvFunctional(PacketPtr pkt) { DPRINTF(RubyPort, "Functional access for address: %#x\n", pkt->getAddr()); @@ -356,31 +370,33 @@ RubyPort::ruby_hit_callback(PacketPtr pkt) port->hitCallback(pkt); + trySendRetries(); +} + +void +RubyPort::trySendRetries() +{ // // If we had to stall the MemSlavePorts, wake them up because the sequencer // likely has free resources now. // if (!retryList.empty()) { - // - // Record the current list of ports to retry on a temporary list before - // calling sendRetry on those ports. sendRetry will cause an - // immediate retry, which may result in the ports being put back on the - // list. Therefore we want to clear the retryList before calling - // sendRetry. - // + // Record the current list of ports to retry on a temporary list + // before calling sendRetryReq on those ports. sendRetryReq will cause + // an immediate retry, which may result in the ports being put back on + // the list. Therefore we want to clear the retryList before calling + // sendRetryReq. std::vector<MemSlavePort *> curRetryList(retryList); retryList.clear(); for (auto i = curRetryList.begin(); i != curRetryList.end(); ++i) { DPRINTF(RubyPort, - "Sequencer may now be free. SendRetry to port %s\n", + "Sequencer may now be free. SendRetry to port %s\n", (*i)->name()); (*i)->sendRetryReq(); } } - - testDrainComplete(); } void diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 58d2558dd..07e0fde5a 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -11,7 +11,7 @@ * unmodified and in its entirety in all distributions of the software, * modified or unmodified, in source code or in binary form. * - * Copyright (c) 2009 Advanced Micro Devices, Inc. + * Copyright (c) 2009-2013 Advanced Micro Devices, Inc. * Copyright (c) 2011 Mark D. Hill and David A. Wood * All rights reserved. * @@ -76,10 +76,12 @@ class RubyPort : public MemObject private: RespPacketQueue queue; bool access_backing_store; + bool no_retry_on_stall; public: MemSlavePort(const std::string &_name, RubyPort *_port, - bool _access_backing_store, PortID id); + bool _access_backing_store, + PortID id, bool _no_retry_on_stall); void hitCallback(PacketPtr pkt); void evictionCallback(Addr address); @@ -94,6 +96,8 @@ class RubyPort : public MemObject AddrRangeList getAddrRanges() const { AddrRangeList ranges; return ranges; } + void addToRetryList(); + private: bool isPhysMemAddress(Addr addr) const; }; @@ -164,6 +168,7 @@ class RubyPort : public MemObject DrainState drain() override; protected: + void trySendRetries(); void ruby_hit_callback(PacketPtr pkt); void testDrainComplete(); void ruby_eviction_callback(Addr address); @@ -186,10 +191,14 @@ class RubyPort : public MemObject System* system; private: + bool onRetryList(MemSlavePort * port) + { + return (std::find(retryList.begin(), retryList.end(), port) != + retryList.end()); + } void addToRetryList(MemSlavePort * port) { - if (std::find(retryList.begin(), retryList.end(), port) != - retryList.end()) return; + if (onRetryList(port)) return; retryList.push_back(port); } diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 26db6b6f8..50418c700 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -491,6 +491,7 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, rs->m_cache_recorder->enqueueNextFlushRequest(); } else { ruby_hit_callback(pkt); + testDrainComplete(); } } diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index 7494986e9..7c90eb29c 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -45,6 +45,7 @@ class RubyPort(MemObject): mem_slave_port = SlavePort("Ruby memory port") using_ruby_tester = Param.Bool(False, "") + no_retry_on_stall = Param.Bool(False, "") ruby_system = Param.RubySystem(Parent.any, "") system = Param.System(Parent.any, "system object") support_data_reqs = Param.Bool(True, "data cache requests supported") |