From 12db50c89584938839e035da47d206250cbfd7c2 Mon Sep 17 00:00:00 2001 From: Nikos Nikoleris Date: Mon, 13 Mar 2017 18:19:08 +0000 Subject: ruby: Add support for address ranges in the directory Previously the directory covered a flat address range that always started from address 0. This change adds a vector of address ranges with interleaving and hashing that each directory keeps track of and the necessary flexibility to support systems with non continuous memory ranges. Change-Id: I6ea1c629bdf4c5137b7d9c89dbaf6c826adfd977 Reviewed-by: Andreas Sandberg Reviewed-on: https://gem5-review.googlesource.com/2903 Reviewed-by: Bradford Beckmann Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- configs/ruby/GPU_RfO.py | 36 +++++++++++++++++------------ configs/ruby/Garnet_standalone.py | 23 ++++--------------- configs/ruby/MESI_Three_Level.py | 25 ++++----------------- configs/ruby/MESI_Two_Level.py | 23 ++++--------------- configs/ruby/MI_example.py | 18 ++++----------- configs/ruby/MOESI_AMD_Base.py | 37 ++++++++++++++++++------------ configs/ruby/MOESI_CMP_directory.py | 24 ++++---------------- configs/ruby/MOESI_CMP_token.py | 25 +++++---------------- configs/ruby/MOESI_hammer.py | 28 ++++++----------------- configs/ruby/Ruby.py | 45 ++++++++++++++++++++++++++----------- 10 files changed, 109 insertions(+), 175 deletions(-) (limited to 'configs') diff --git a/configs/ruby/GPU_RfO.py b/configs/ruby/GPU_RfO.py index 71e21d932..832ea4422 100644 --- a/configs/ruby/GPU_RfO.py +++ b/configs/ruby/GPU_RfO.py @@ -371,24 +371,14 @@ class L3Cntrl(L3Cache_Controller, CntrlBase): self.probeToL3 = probe_to_l3 self.respToL3 = resp_to_l3 -class DirMem(RubyDirectoryMemory, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - - phys_mem_size = AddrRange(options.mem_size).size() - mem_module_size = phys_mem_size / options.num_dirs - dir_size = MemorySize('0B') - dir_size.value = mem_module_size - self.size = dir_size - class DirCntrl(Directory_Controller, CntrlBase): - def create(self, options, ruby_system, system): + def create(self, options, dir_ranges, ruby_system, system): self.version = self.versionCount() self.response_latency = 30 - self.directory = DirMem() - self.directory.create(options, ruby_system, system) + self.addr_ranges = dir_ranges + self.directory = RubyDirectoryMemory() self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) @@ -467,10 +457,28 @@ def create_system(options, full_system, system, dma_devices, ruby_system): # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu # Clusters mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s + + if options.numa_high_bit: + numa_bit = options.numa_high_bit + else: + # if the numa_bit is not specified, set the directory bits as the + # lowest bits above the block offset bits, and the numa_bit as the + # highest of those directory bits + dir_bits = int(math.log(options.num_dirs, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + numa_bit = block_size_bits + dir_bits - 1 + for i in xrange(options.num_dirs): + dir_ranges = [] + for r in system.mem_ranges: + addr_range = m5.objects.AddrRange(r.start, size = r.size(), + intlvHighBit = numa_bit, + intlvBits = dir_bits, + intlvMatch = i) + dir_ranges.append(addr_range) dir_cntrl = DirCntrl(TCC_select_num_bits = TCC_bits) - dir_cntrl.create(options, ruby_system, system) + dir_cntrl.create(options, dir_ranges, ruby_system, system) dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units #Enough TBEs for all TCP TBEs diff --git a/configs/ruby/Garnet_standalone.py b/configs/ruby/Garnet_standalone.py index 2897e73a4..5c173ce17 100644 --- a/configs/ruby/Garnet_standalone.py +++ b/configs/ruby/Garnet_standalone.py @@ -32,7 +32,7 @@ import m5 from m5.objects import * from m5.defines import buildEnv from m5.util import addToPath -from Ruby import create_topology +from Ruby import create_topology, create_directories # # Declare caches used by the protocol @@ -59,7 +59,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # Therefore the l1 controller nodes must be listed before # the directory nodes and directory nodes before dma nodes, etc. l1_cntrl_nodes = [] - dir_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the @@ -101,23 +100,9 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l1_cntrl.forwardFromCache = MessageBuffer() - phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) - assert(phys_mem_size % options.num_dirs == 0) - mem_module_size = phys_mem_size / options.num_dirs - - for i in xrange(options.num_dirs): - dir_size = MemorySize('0B') - dir_size.value = mem_module_size - - dir_cntrl = Directory_Controller(version = i, - directory = \ - RubyDirectoryMemory(version = i, - size = dir_size), - ruby_system = ruby_system) - - exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) - + dir_cntrl_nodes = create_directories(options, system.mem_ranges, + ruby_system) + for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.forwardToDir = MessageBuffer() diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py index 1d4b6ebf6..5d9d5b2f8 100644 --- a/configs/ruby/MESI_Three_Level.py +++ b/configs/ruby/MESI_Three_Level.py @@ -33,7 +33,7 @@ import math import m5 from m5.objects import * from m5.defines import buildEnv -from Ruby import create_topology +from Ruby import create_topology, create_directories from Ruby import send_evicts # @@ -66,7 +66,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l0_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] - dir_cntrl_nodes = [] dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) @@ -194,31 +193,15 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master - phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) - assert(phys_mem_size % options.num_dirs == 0) - mem_module_size = phys_mem_size / options.num_dirs - # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain = ruby_system.clk_domain, clk_divider = 3) - for i in xrange(options.num_dirs): - # - # Create the Ruby objects associated with the directory controller - # - dir_size = MemorySize('0B') - dir_size.value = mem_module_size - - dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory(version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) - - exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) - + dir_cntrl_nodes = create_directories(options, system.mem_ranges, + ruby_system) + for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py index 4cfa54bd8..844c62af4 100644 --- a/configs/ruby/MESI_Two_Level.py +++ b/configs/ruby/MESI_Two_Level.py @@ -31,7 +31,7 @@ import math import m5 from m5.objects import * from m5.defines import buildEnv -from Ruby import create_topology +from Ruby import create_topology, create_directories from Ruby import send_evicts # @@ -57,7 +57,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # l1_cntrl_nodes = [] l2_cntrl_nodes = [] - dir_cntrl_nodes = [] dma_cntrl_nodes = [] # @@ -167,11 +166,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l2_cntrl.responseToL2Cache.slave = ruby_system.network.master - phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) - assert(phys_mem_size % options.num_dirs == 0) - mem_module_size = phys_mem_size / options.num_dirs - - # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. @@ -179,18 +173,9 @@ def create_system(options, full_system, system, dma_ports, ruby_system): clk_domain = ruby_system.clk_domain, clk_divider = 3) - for i in xrange(options.num_dirs): - dir_size = MemorySize('0B') - dir_size.value = mem_module_size - - dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory(version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) - - exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) - + dir_cntrl_nodes = create_directories(options, system.mem_ranges, + ruby_system) + for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py index 24b0f9716..eb881e55c 100644 --- a/configs/ruby/MI_example.py +++ b/configs/ruby/MI_example.py @@ -31,7 +31,7 @@ import math import m5 from m5.objects import * from m5.defines import buildEnv -from Ruby import create_topology +from Ruby import create_topology, create_directories from Ruby import send_evicts # @@ -55,7 +55,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] - dir_cntrl_nodes = [] dma_cntrl_nodes = [] # @@ -126,18 +125,9 @@ def create_system(options, full_system, system, dma_ports, ruby_system): clk_domain=ruby_system.clk_domain, clk_divider=3) - for i in xrange(options.num_dirs): - dir_size = MemorySize('0B') - dir_size.value = mem_module_size - dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) - - exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) - + dir_cntrl_nodes = create_directories(options, system.mem_ranges, + ruby_system) + for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer(ordered = True) dir_cntrl.requestToDir.slave = ruby_system.network.master diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py index f1b3d792b..cdbb6f600 100644 --- a/configs/ruby/MOESI_AMD_Base.py +++ b/configs/ruby/MOESI_AMD_Base.py @@ -166,24 +166,14 @@ class L3Cntrl(L3Cache_Controller, CntrlBase): self.probeToL3 = probe_to_l3 self.respToL3 = resp_to_l3 -class DirMem(RubyDirectoryMemory, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - - phys_mem_size = AddrRange(options.mem_size).size() - mem_module_size = phys_mem_size / options.num_dirs - dir_size = MemorySize('0B') - dir_size.value = mem_module_size - self.size = dir_size - class DirCntrl(Directory_Controller, CntrlBase): - def create(self, options, ruby_system, system): + def create(self, options, dir_ranges, ruby_system, system): self.version = self.versionCount() self.response_latency = 30 - self.directory = DirMem() - self.directory.create(options, ruby_system, system) + self.addr_ranges = dir_ranges + self.directory = RubyDirectoryMemory() self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) @@ -245,10 +235,29 @@ def create_system(options, full_system, system, dma_devices, ruby_system): # This is the base crossbar that connects the L3s, Dirs, and cpu # Cluster mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s + + if options.numa_high_bit: + numa_bit = options.numa_high_bit + else: + # if the numa_bit is not specified, set the directory bits as the + # lowest bits above the block offset bits, and the numa_bit as the + # highest of those directory bits + dir_bits = int(math.log(options.num_dirs, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + numa_bit = block_size_bits + dir_bits - 1 + for i in xrange(options.num_dirs): + dir_ranges = [] + for r in system.mem_ranges: + addr_range = m5.objects.AddrRange(r.start, size = r.size(), + intlvHighBit = numa_bit, + intlvBits = dir_bits, + intlvMatch = i) + dir_ranges.append(addr_range) + dir_cntrl = DirCntrl(TCC_select_num_bits = 0) - dir_cntrl.create(options, ruby_system, system) + dir_cntrl.create(options, dir_ranges, ruby_system, system) # Connect the Directory controller to the ruby network dir_cntrl.requestFromCores = MessageBuffer(ordered = True) diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py index a72b5b20e..cbb061d32 100644 --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -31,7 +31,7 @@ import math import m5 from m5.objects import * from m5.defines import buildEnv -from Ruby import create_topology +from Ruby import create_topology, create_directories from Ruby import send_evicts # @@ -57,7 +57,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # l1_cntrl_nodes = [] l2_cntrl_nodes = [] - dir_cntrl_nodes = [] dma_cntrl_nodes = [] # @@ -158,12 +157,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l2_cntrl.responseToL2Cache.slave = ruby_system.network.master l2_cntrl.triggerQueue = MessageBuffer(ordered = True) - - phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) - assert(phys_mem_size % options.num_dirs == 0) - mem_module_size = phys_mem_size / options.num_dirs - - # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system. # clk_divider value is a fix to pass regression. @@ -171,19 +164,10 @@ def create_system(options, full_system, system, dma_ports, ruby_system): clk_domain=ruby_system.clk_domain, clk_divider=3) - for i in xrange(options.num_dirs): - dir_size = MemorySize('0B') - dir_size.value = mem_module_size - - dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) - - exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) + dir_cntrl_nodes = create_directories(options, system.mem_ranges, + ruby_system) + for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py index 7161544b7..7c9871970 100644 --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -31,7 +31,7 @@ import math import m5 from m5.objects import * from m5.defines import buildEnv -from Ruby import create_topology +from Ruby import create_topology, create_directories from Ruby import send_evicts # @@ -70,7 +70,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # l1_cntrl_nodes = [] l2_cntrl_nodes = [] - dir_cntrl_nodes = [] dma_cntrl_nodes = [] # @@ -184,10 +183,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l2_cntrl.persistentToL2Cache.slave = ruby_system.network.master - phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) - assert(phys_mem_size % options.num_dirs == 0) - mem_module_size = phys_mem_size / options.num_dirs - # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. @@ -195,20 +190,10 @@ def create_system(options, full_system, system, dma_ports, ruby_system): clk_domain=ruby_system.clk_domain, clk_divider=3) - for i in xrange(options.num_dirs): - dir_size = MemorySize('0B') - dir_size.value = mem_module_size - - dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - l2_select_num_bits = l2_bits, - transitions_per_cycle = options.ports, - ruby_system = ruby_system) - - exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) - + dir_cntrl_nodes = create_directories(options, system.mem_ranges, + ruby_system) + for dir_cntrl in dir_cntrl_nodes: + dir_cntrl.l2_select_num_bits = l2_bits # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py index 6a1cfd70b..9f615f931 100644 --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -31,7 +31,7 @@ import math import m5 from m5.objects import * from m5.defines import buildEnv -from Ruby import create_topology +from Ruby import create_topology, create_directories from Ruby import send_evicts # @@ -65,7 +65,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] - dir_cntrl_nodes = [] dma_cntrl_nodes = [] # @@ -143,10 +142,6 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l1_cntrl.responseToCache.slave = ruby_system.network.master - phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) - assert(phys_mem_size % options.num_dirs == 0) - mem_module_size = phys_mem_size / options.num_dirs - # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the @@ -177,28 +172,19 @@ def create_system(options, full_system, system, dma_ports, ruby_system): clk_domain=ruby_system.clk_domain, clk_divider=3) - for i in xrange(options.num_dirs): - dir_size = MemorySize('0B') - dir_size.value = mem_module_size - + dir_cntrl_nodes = create_directories(options, system.mem_ranges, + ruby_system) + for dir_cntrl in dir_cntrl_nodes: pf = ProbeFilter(size = pf_size, assoc = 4, start_index_bit = pf_start_bit) - dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - probeFilter = pf, - probe_filter_enabled = options.pf_on, - full_bit_dir_enabled = options.dir_on, - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + dir_cntrl.probeFilter = pf + dir_cntrl.probe_filter_enabled = options.pf_on + dir_cntrl.full_bit_dir_enabled = options.dir_on if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency - exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) - # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index 8f5edb807..71f6eef7c 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012 ARM Limited +# Copyright (c) 2012, 2017 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -84,16 +84,6 @@ def define_options(parser): def setup_memory_controllers(system, ruby, dir_cntrls, options): ruby.block_size_bytes = options.cacheline_size ruby.memory_size_bits = 48 - block_size_bits = int(math.log(options.cacheline_size, 2)) - - if options.numa_high_bit: - numa_bit = options.numa_high_bit - else: - # if the numa_bit is not specified, set the directory bits as the - # lowest bits above the block offset bits, and the numa_bit as the - # highest of those directory bits - dir_bits = int(math.log(options.num_dirs, 2)) - numa_bit = block_size_bits + dir_bits - 1 index = 0 mem_ctrls = [] @@ -104,8 +94,6 @@ def setup_memory_controllers(system, ruby, dir_cntrls, options): # for each address range as the abstract memory can handle only one # contiguous address range as of now. for dir_cntrl in dir_cntrls: - dir_cntrl.directory.numa_high_bit = numa_bit - crossbar = None if len(system.mem_ranges) > 1: crossbar = IOXBar() @@ -208,6 +196,37 @@ def create_system(options, full_system, system, piobus = None, dma_ports = []): ruby.phys_mem = SimpleMemory(range=system.mem_ranges[0], in_addr_map=False) +def create_directories(options, mem_ranges, ruby_system): + dir_cntrl_nodes = [] + if options.numa_high_bit: + numa_bit = options.numa_high_bit + else: + # if the numa_bit is not specified, set the directory bits as the + # lowest bits above the block offset bits, and the numa_bit as the + # highest of those directory bits + dir_bits = int(math.log(options.num_dirs, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + numa_bit = block_size_bits + dir_bits - 1 + + for i in xrange(options.num_dirs): + dir_ranges = [] + for r in mem_ranges: + addr_range = m5.objects.AddrRange(r.start, size = r.size(), + intlvHighBit = numa_bit, + intlvBits = dir_bits, + intlvMatch = i) + dir_ranges.append(addr_range) + + dir_cntrl = Directory_Controller() + dir_cntrl.version = i + dir_cntrl.directory = RubyDirectoryMemory() + dir_cntrl.ruby_system = ruby_system + dir_cntrl.addr_ranges = dir_ranges + + exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) + dir_cntrl_nodes.append(dir_cntrl) + return dir_cntrl_nodes + def send_evicts(options): # currently, 2 scenarios warrant forwarding evictions to the CPU: # 1. The O3 model must keep the LSQ coherent with the caches -- cgit v1.2.3