diff options
-rw-r--r-- | configs/common/CacheConfig.py | 6 | ||||
-rw-r--r-- | configs/common/FSConfig.py | 14 | ||||
-rw-r--r-- | configs/dram/sweep.py | 2 | ||||
-rw-r--r-- | configs/example/memcheck.py | 4 | ||||
-rw-r--r-- | configs/example/memtest.py | 4 | ||||
-rw-r--r-- | configs/example/ruby_mem_test.py | 2 | ||||
-rw-r--r-- | configs/example/se.py | 2 | ||||
-rw-r--r-- | configs/ruby/Ruby.py | 2 | ||||
-rw-r--r-- | configs/splash2/cluster.py | 10 | ||||
-rw-r--r-- | configs/splash2/run.py | 4 | ||||
-rw-r--r-- | src/cpu/BaseCPU.py | 7 | ||||
-rw-r--r-- | src/mem/XBar.py | 51 | ||||
-rw-r--r-- | tests/configs/base_config.py | 4 | ||||
-rw-r--r-- | tests/configs/memtest-filter.py | 6 | ||||
-rw-r--r-- | tests/configs/memtest.py | 4 | ||||
-rw-r--r-- | tests/configs/o3-timing-mp-ruby.py | 2 | ||||
-rw-r--r-- | tests/configs/o3-timing-ruby.py | 2 | ||||
-rw-r--r-- | tests/configs/simple-atomic-mp-ruby.py | 2 | ||||
-rw-r--r-- | tests/configs/tgen-dram-ctrl.py | 2 | ||||
-rw-r--r-- | tests/configs/tgen-simple-mem.py | 2 |
20 files changed, 84 insertions, 48 deletions
diff --git a/configs/common/CacheConfig.py b/configs/common/CacheConfig.py index f31b3d566..66fe491e1 100644 --- a/configs/common/CacheConfig.py +++ b/configs/common/CacheConfig.py @@ -65,14 +65,12 @@ def config_cache(options, system): if options.l2cache: # Provide a clock for the L2 and the L1-to-L2 bus here as they # are not connected using addTwoLevelCacheHierarchy. Use the - # same clock as the CPUs, and set the L1-to-L2 bus width to 32 - # bytes (256 bits). + # same clock as the CPUs. system.l2 = l2_cache_class(clk_domain=system.cpu_clk_domain, size=options.l2_size, assoc=options.l2_assoc) - system.tol2bus = CoherentXBar(clk_domain = system.cpu_clk_domain, - width = 32) + system.tol2bus = L2XBar(clk_domain = system.cpu_clk_domain) system.l2.cpu_side = system.tol2bus.master system.l2.mem_side = system.membus.slave diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py index cfa6dee4d..e95fff424 100644 --- a/configs/common/FSConfig.py +++ b/configs/common/FSConfig.py @@ -50,7 +50,7 @@ class CowIdeDisk(IdeDisk): def childImage(self, ci): self.image.child.image_file = ci -class MemBus(CoherentXBar): +class MemBus(SystemXBar): badaddr_responder = BadAddr() default = Self.badaddr_responder.pio @@ -78,7 +78,7 @@ def makeLinuxAlphaSystem(mem_mode, mdesc=None, ruby=False, cmdline=None): self.tsunami = BaseTsunami() # Create the io bus to connect all device ports - self.iobus = NoncoherentXBar() + self.iobus = IOXBar() self.tsunami.attachIO(self.iobus) self.tsunami.ide.pio = self.iobus.master @@ -143,7 +143,7 @@ def makeSparcSystem(mem_mode, mdesc=None): # generic system mdesc = SysConfig() self.readfile = mdesc.script() - self.iobus = NoncoherentXBar() + self.iobus = IOXBar() self.membus = MemBus() self.bridge = Bridge(delay='50ns') self.t1000 = T1000() @@ -205,7 +205,7 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None, mdesc = SysConfig() self.readfile = mdesc.script() - self.iobus = NoncoherentXBar() + self.iobus = IOXBar() self.membus = MemBus() self.membus.badaddr_responder.warn_access = "warn" self.bridge = Bridge(delay='50ns') @@ -311,7 +311,7 @@ def makeLinuxMipsSystem(mem_mode, mdesc=None, cmdline=None): # generic system mdesc = SysConfig() self.readfile = mdesc.script() - self.iobus = NoncoherentXBar() + self.iobus = IOXBar() self.membus = MemBus() self.bridge = Bridge(delay='50ns') self.mem_ranges = [AddrRange('1GB')] @@ -358,7 +358,7 @@ def connectX86ClassicSystem(x86_sys, numCPUs): x86_sys.membus = MemBus() # North Bridge - x86_sys.iobus = NoncoherentXBar() + x86_sys.iobus = IOXBar() x86_sys.bridge = Bridge(delay='50ns') x86_sys.bridge.master = x86_sys.iobus.slave x86_sys.bridge.slave = x86_sys.membus.master @@ -394,7 +394,7 @@ def connectX86ClassicSystem(x86_sys, numCPUs): def connectX86RubySystem(x86_sys): # North Bridge - x86_sys.iobus = NoncoherentXBar() + x86_sys.iobus = IOXBar() # add the ide to the list of dma devices that later need to attach to # dma controllers diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py index 18a58b2da..f0b20dcc5 100644 --- a/configs/dram/sweep.py +++ b/configs/dram/sweep.py @@ -84,7 +84,7 @@ if args: # start with the system itself, using a multi-layer 1.5 GHz # crossbar, delivering 64 bytes / 5 cycles (one header cycle) # which amounts to 19.2 GByte/s per layer and thus per port -system = System(membus = NoncoherentXBar(width = 16)) +system = System(membus = IOXBar(width = 16)) system.clk_domain = SrcClockDomain(clock = '1.5GHz', voltage_domain = VoltageDomain(voltage = '1V')) diff --git a/configs/example/memcheck.py b/configs/example/memcheck.py index 4f85223d9..f0bc26e32 100644 --- a/configs/example/memcheck.py +++ b/configs/example/memcheck.py @@ -243,7 +243,7 @@ def make_cache_level(ncaches, prototypes, level, next_cache): if level != 0: # Create a crossbar and add it to the subsystem, note that # we do this even with a single element on this level - xbar = CoherentXBar(width = 32) + xbar = L2XBar(width = 32) subsys.xbar = xbar if next_cache: xbar.master = next_cache.cpu_side @@ -269,7 +269,7 @@ def make_cache_level(ncaches, prototypes, level, next_cache): if ntesters > 1: # Create a crossbar and add it to the subsystem - xbar = CoherentXBar(width = 32) + xbar = L2XBar(width = 32) subsys.xbar = xbar xbar.master = next_cache.cpu_side for tester, checker in zip(testers, checkers): diff --git a/configs/example/memtest.py b/configs/example/memtest.py index 6c1e657e4..9a66320d8 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -233,7 +233,7 @@ def make_cache_level(ncaches, prototypes, level, next_cache): if level != 0: # Create a crossbar and add it to the subsystem, note that # we do this even with a single element on this level - xbar = CoherentXBar(width = 32) + xbar = L2XBar() subsys.xbar = xbar if next_cache: xbar.master = next_cache.cpu_side @@ -258,7 +258,7 @@ def make_cache_level(ncaches, prototypes, level, next_cache): if ntesters > 1: # Create a crossbar and add it to the subsystem - xbar = CoherentXBar(width = 32) + xbar = L2XBar() subsys.xbar = xbar xbar.master = next_cache.cpu_side for tester in testers: diff --git a/configs/example/ruby_mem_test.py b/configs/example/ruby_mem_test.py index f5e6d2a82..e2887410f 100644 --- a/configs/example/ruby_mem_test.py +++ b/configs/example/ruby_mem_test.py @@ -106,7 +106,7 @@ cpus = [ MemTest(atomic = False, system = System(cpu = cpus, funcmem = SimpleMemory(in_addr_map = False), - funcbus = NoncoherentXBar(), + funcbus = IOXBar(), clk_domain = SrcClockDomain(clock = options.sys_clock), mem_ranges = [AddrRange(options.mem_size)]) diff --git a/configs/example/se.py b/configs/example/se.py index 3f51acdeb..a582d2976 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -265,7 +265,7 @@ if options.ruby: system.cpu[i].dtb.walker.port = ruby_port.slave else: MemClass = Simulation.setMemClass(options) - system.membus = CoherentXBar() + system.membus = SystemXBar() system.system_port = system.membus.slave CacheConfig.config_cache(options, system) MemConfig.config_mem(options, system) diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index e0d53fd6c..1fa969782 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -116,7 +116,7 @@ def setup_memory_controllers(system, ruby, dir_cntrls, options): crossbar = None if len(system.mem_ranges) > 1: - crossbar = NoncoherentXBar() + crossbar = IOXBar() crossbars.append(crossbar) dir_cntrl.memory = crossbar.slave diff --git a/configs/splash2/cluster.py b/configs/splash2/cluster.py index b17c8877e..9fafcb70c 100644 --- a/configs/splash2/cluster.py +++ b/configs/splash2/cluster.py @@ -171,7 +171,7 @@ if options.timing: for j in xrange(options.numclusters): clusters[j].id = j for cluster in clusters: - cluster.clusterbus = CoherentXBar(clock=busFrequency) + cluster.clusterbus = L2XBar(clock=busFrequency) all_l1buses += [cluster.clusterbus] cluster.cpus = [TimingSimpleCPU(cpu_id = i + cluster.id, clock=options.frequency) @@ -184,7 +184,7 @@ elif options.detailed: for j in xrange(options.numclusters): clusters[j].id = j for cluster in clusters: - cluster.clusterbus = CoherentXBar(clock=busFrequency) + cluster.clusterbus = L2XBar(clock=busFrequency) all_l1buses += [cluster.clusterbus] cluster.cpus = [DerivO3CPU(cpu_id = i + cluster.id, clock=options.frequency) @@ -197,7 +197,7 @@ else: for j in xrange(options.numclusters): clusters[j].id = j for cluster in clusters: - cluster.clusterbus = CoherentXBar(clock=busFrequency) + cluster.clusterbus = L2XBar(clock=busFrequency) all_l1buses += [cluster.clusterbus] cluster.cpus = [AtomicSimpleCPU(cpu_id = i + cluster.id, clock=options.frequency) @@ -211,10 +211,10 @@ else: # ---------------------- system = System(cpu = all_cpus, l1_ = all_l1s, l1bus_ = all_l1buses, physmem = SimpleMemory(), - membus = CoherentXBar(clock = busFrequency)) + membus = SystemXBar(clock = busFrequency)) system.clock = '1GHz' -system.toL2bus = CoherentXBar(clock = busFrequency) +system.toL2bus = L2XBar(clock = busFrequency) system.l2 = L2(size = options.l2size, assoc = 8) # ---------------------- diff --git a/configs/splash2/run.py b/configs/splash2/run.py index d542a9437..14e5f47d4 100644 --- a/configs/splash2/run.py +++ b/configs/splash2/run.py @@ -196,10 +196,10 @@ else: # Create a system, and add system wide objects # ---------------------- system = System(cpu = cpus, physmem = SimpleMemory(), - membus = CoherentXBar(clock = busFrequency)) + membus = SystemXBar(clock = busFrequency)) system.clock = '1GHz' -system.toL2bus = CoherentXBar(clock = busFrequency) +system.toL2bus = L2XBar(clock = busFrequency) system.l2 = L2(size = options.l2size, assoc = 8) # ---------------------- diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index ee6c05f46..9aa24c97b 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -47,7 +47,7 @@ from m5.defines import buildEnv from m5.params import * from m5.proxy import * -from XBar import CoherentXBar +from XBar import L2XBar from InstTracer import InstTracer from CPUTracers import ExeTracer from MemObject import MemObject @@ -285,10 +285,7 @@ class BaseCPU(MemObject): def addTwoLevelCacheHierarchy(self, ic, dc, l2c, iwc = None, dwc = None): self.addPrivateSplitL1Caches(ic, dc, iwc, dwc) - # Set a width of 32 bytes (256-bits), which is four times that - # of the default bus. The clock of the CPU is inherited by - # default. - self.toL2Bus = CoherentXBar(width = 32) + self.toL2Bus = L2XBar() self.connectCachedPorts(self.toL2Bus) self.l2cache = l2c self.toL2Bus.master = self.l2cache.cpu_side diff --git a/src/mem/XBar.py b/src/mem/XBar.py index 64910ed72..a445b5e37 100644 --- a/src/mem/XBar.py +++ b/src/mem/XBar.py @@ -66,12 +66,12 @@ class BaseXBar(MemObject): # is the latency involved once a decision is made to forward the # request. The response latency, is similar to the forward # latency, but for responses rather than requests. - frontend_latency = Param.Cycles(3, "Frontend latency") - forward_latency = Param.Cycles(4, "Forward latency") - response_latency = Param.Cycles(2, "Response latency") + frontend_latency = Param.Cycles("Frontend latency") + forward_latency = Param.Cycles("Forward latency") + response_latency = Param.Cycles("Response latency") # Width governing the throughput of the crossbar - width = Param.Unsigned(8, "Datapath width per port (bytes)") + width = Param.Unsigned("Datapath width per port (bytes)") # The default port can be left unconnected, or be used to connect # a default slave port @@ -95,7 +95,7 @@ class CoherentXBar(BaseXBar): # The coherent crossbar additionally has snoop responses that are # forwarded after a specific latency. - snoop_response_latency = Param.Cycles(4, "Snoop response latency") + snoop_response_latency = Param.Cycles("Snoop response latency") # An optional snoop filter snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter") @@ -111,3 +111,44 @@ class SnoopFilter(SimObject): lookup_latency = Param.Cycles(1, "Lookup latency") system = Param.System(Parent.any, "System that the crossbar belongs to.") + +# We use a coherent crossbar to connect multiple masters to the L2 +# caches. Normally this crossbar would be part of the cache itself. +class L2XBar(CoherentXBar): + # 256-bit crossbar by default + width = 32 + + # Assume that most of this is covered by the cache latencies, with + # no more than a single pipeline stage for any packet. + frontend_latency = 1 + forward_latency = 0 + response_latency = 1 + snoop_response_latency = 1 + +# One of the key coherent crossbar instances is the system +# interconnect, tying together the CPU clusters, GPUs, and any I/O +# coherent masters, and DRAM controllers. +class SystemXBar(CoherentXBar): + # 128-bit crossbar by default + width = 16 + + # A handful pipeline stages for each portion of the latency + # contributions. + frontend_latency = 3 + forward_latency = 4 + response_latency = 2 + snoop_response_latency = 4 + +# In addition to the system interconnect, we typically also have one +# or more on-chip I/O crossbars. Note that at some point we might want +# to also define an off-chip I/O crossbar such as PCIe. +class IOXBar(NoncoherentXBar): + # 128-bit crossbar by default + width = 16 + + # Assume a simpler datapath than a coherent crossbar, incuring + # less pipeline stages for decision making and forwarding of + # requests. + frontend_latency = 2 + forward_latency = 1 + response_latency = 2 diff --git a/tests/configs/base_config.py b/tests/configs/base_config.py index 5637ca3f5..c440d48d9 100644 --- a/tests/configs/base_config.py +++ b/tests/configs/base_config.py @@ -104,7 +104,7 @@ class BaseSystem(object): Returns: A bus that CPUs should use to connect to the shared cache. """ - system.toL2Bus = CoherentXBar(clk_domain=system.cpu_clk_domain) + system.toL2Bus = L2XBar(clk_domain=system.cpu_clk_domain) system.l2c = L2Cache(clk_domain=system.cpu_clk_domain, size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master @@ -186,7 +186,7 @@ class BaseSESystem(BaseSystem): def create_system(self): system = System(physmem = self.mem_class(), - membus = CoherentXBar(), + membus = SystemXBar(), mem_mode = self.mem_mode) system.system_port = system.membus.slave system.physmem.port = system.membus.master diff --git a/tests/configs/memtest-filter.py b/tests/configs/memtest-filter.py index 42dd05639..34ac75f00 100644 --- a/tests/configs/memtest-filter.py +++ b/tests/configs/memtest-filter.py @@ -38,7 +38,7 @@ cpus = [ MemTest() for i in xrange(nb_cores) ] # system simulated system = System(cpu = cpus, physmem = SimpleMemory(), - membus = CoherentXBar(width=16, snoop_filter = SnoopFilter())) + membus = SystemXBar(width=16, snoop_filter = SnoopFilter())) # Dummy voltage domain for all our clock domains system.voltage_domain = VoltageDomain() system.clk_domain = SrcClockDomain(clock = '1GHz', @@ -49,8 +49,8 @@ system.clk_domain = SrcClockDomain(clock = '1GHz', system.cpu_clk_domain = SrcClockDomain(clock = '2GHz', voltage_domain = system.voltage_domain) -system.toL2Bus = CoherentXBar(clk_domain = system.cpu_clk_domain, width=16, - snoop_filter = SnoopFilter()) +system.toL2Bus = L2XBar(clk_domain = system.cpu_clk_domain, + snoop_filter = SnoopFilter()) system.l2c = L2Cache(clk_domain = system.cpu_clk_domain, size='64kB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py index 42f50ce3b..5bbfeb774 100644 --- a/tests/configs/memtest.py +++ b/tests/configs/memtest.py @@ -38,7 +38,7 @@ cpus = [ MemTest() for i in xrange(nb_cores) ] # system simulated system = System(cpu = cpus, physmem = SimpleMemory(), - membus = CoherentXBar(width=16)) + membus = SystemXBar()) # Dummy voltage domain for all our clock domains system.voltage_domain = VoltageDomain() system.clk_domain = SrcClockDomain(clock = '1GHz', @@ -49,7 +49,7 @@ system.clk_domain = SrcClockDomain(clock = '1GHz', system.cpu_clk_domain = SrcClockDomain(clock = '2GHz', voltage_domain = system.voltage_domain) -system.toL2Bus = CoherentXBar(clk_domain = system.cpu_clk_domain, width=16) +system.toL2Bus = L2XBar(clk_domain = system.cpu_clk_domain) system.l2c = L2Cache(clk_domain = system.cpu_clk_domain, size='64kB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master diff --git a/tests/configs/o3-timing-mp-ruby.py b/tests/configs/o3-timing-mp-ruby.py index 3fea4ed71..fb2d56fd1 100644 --- a/tests/configs/o3-timing-mp-ruby.py +++ b/tests/configs/o3-timing-mp-ruby.py @@ -38,7 +38,7 @@ import ruby_config ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores) # system simulated -system = System(cpu = cpus, physmem = ruby_memory, membus = CoherentXBar(), +system = System(cpu = cpus, physmem = ruby_memory, membus = SystemXBar(), mem_mode = "timing", clk_domain = SrcClockDomain(clock = '1GHz')) diff --git a/tests/configs/o3-timing-ruby.py b/tests/configs/o3-timing-ruby.py index 68a07e702..c47d9f355 100644 --- a/tests/configs/o3-timing-ruby.py +++ b/tests/configs/o3-timing-ruby.py @@ -39,7 +39,7 @@ cpu = DerivO3CPU(cpu_id=0) system = System(cpu = cpu, physmem = ruby_memory, - membus = CoherentXBar(), + membus = SystemXBar(), mem_mode = "timing", clk_domain = SrcClockDomain(clock = '1GHz')) diff --git a/tests/configs/simple-atomic-mp-ruby.py b/tests/configs/simple-atomic-mp-ruby.py index 321cb977f..bdda6d005 100644 --- a/tests/configs/simple-atomic-mp-ruby.py +++ b/tests/configs/simple-atomic-mp-ruby.py @@ -38,7 +38,7 @@ import ruby_config ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores) # system simulated -system = System(cpu = cpus, physmem = ruby_memory, membus = CoherentXBar(), +system = System(cpu = cpus, physmem = ruby_memory, membus = SystemXBar(), clk_domain = SrcClockDomain(clock = '1GHz')) # Create a seperate clock domain for components that should run at diff --git a/tests/configs/tgen-dram-ctrl.py b/tests/configs/tgen-dram-ctrl.py index d170ac077..cd6721e6d 100644 --- a/tests/configs/tgen-dram-ctrl.py +++ b/tests/configs/tgen-dram-ctrl.py @@ -49,7 +49,7 @@ cpu = TrafficGen(config_file = "tests/quick/se/70.tgen/tgen-dram-ctrl.cfg") # system simulated system = System(cpu = cpu, physmem = DDR3_1600_x64(), - membus = NoncoherentXBar(width = 16), + membus = IOXBar(width = 16), clk_domain = SrcClockDomain(clock = '1GHz', voltage_domain = VoltageDomain())) diff --git a/tests/configs/tgen-simple-mem.py b/tests/configs/tgen-simple-mem.py index be700ac7a..edb2f9fcd 100644 --- a/tests/configs/tgen-simple-mem.py +++ b/tests/configs/tgen-simple-mem.py @@ -49,7 +49,7 @@ cpu = TrafficGen(config_file = "tests/quick/se/70.tgen/tgen-simple-mem.cfg") # system simulated system = System(cpu = cpu, physmem = SimpleMemory(), - membus = NoncoherentXBar(width = 16), + membus = IOXBar(width = 16), clk_domain = SrcClockDomain(clock = '1GHz', voltage_domain = VoltageDomain())) |