diff options
author | Gabe Black <gblack@eecs.umich.edu> | 2006-11-16 14:41:56 -0500 |
---|---|---|
committer | Gabe Black <gblack@eecs.umich.edu> | 2006-11-16 14:41:56 -0500 |
commit | 14ebaa1eccff4032d59147783e98e07b81b5f1ae (patch) | |
tree | 4c738d02f4876cb394d0f9510d08380a6f5384c1 | |
parent | ac2c7967f69e3ffd29a1ed04a15838073dc060de (diff) | |
parent | dbdf2f14ae6b586efd31b73aa4548a38ecee263f (diff) | |
download | gem5-14ebaa1eccff4032d59147783e98e07b81b5f1ae.tar.xz |
Merge zizzer.eecs.umich.edu:/bk/newmem/
into zower.eecs.umich.edu:/home/gblack/m5/newmemmemops
--HG--
extra : convert_revision : c49b760eac758dbde30867cb638fcb3b790f4721
-rw-r--r-- | configs/common/Caches.py | 7 | ||||
-rw-r--r-- | configs/example/fs.py | 14 | ||||
-rw-r--r-- | configs/splash2/cluster.py | 303 | ||||
-rw-r--r-- | configs/splash2/run.py | 2 | ||||
-rw-r--r-- | src/cpu/base.cc | 12 | ||||
-rw-r--r-- | src/cpu/base.hh | 3 | ||||
-rw-r--r-- | src/cpu/memtest/memtest.cc | 10 | ||||
-rw-r--r-- | src/cpu/memtest/memtest.hh | 4 | ||||
-rw-r--r-- | src/cpu/o3/alpha/cpu_builder.cc | 2 | ||||
-rw-r--r-- | src/cpu/o3/fetch.hh | 2 | ||||
-rw-r--r-- | src/cpu/o3/fetch_impl.hh | 9 | ||||
-rw-r--r-- | src/cpu/o3/lsq.hh | 2 | ||||
-rw-r--r-- | src/cpu/o3/lsq_impl.hh | 10 | ||||
-rw-r--r-- | src/cpu/o3/mips/cpu_builder.cc | 2 | ||||
-rw-r--r-- | src/cpu/simple/atomic.cc | 15 | ||||
-rw-r--r-- | src/cpu/simple/atomic.hh | 2 | ||||
-rw-r--r-- | src/cpu/simple/timing.cc | 33 | ||||
-rw-r--r-- | src/cpu/simple/timing.hh | 4 | ||||
-rw-r--r-- | src/mem/bridge.cc | 12 | ||||
-rw-r--r-- | src/mem/bus.cc | 116 | ||||
-rw-r--r-- | src/mem/bus.hh | 6 | ||||
-rw-r--r-- | src/mem/cache/base_cache.cc | 14 | ||||
-rw-r--r-- | src/mem/cache/base_cache.hh | 7 | ||||
-rw-r--r-- | src/mem/packet.hh | 2 | ||||
-rw-r--r-- | src/python/m5/objects/BaseCPU.py | 1 |
25 files changed, 523 insertions, 71 deletions
diff --git a/configs/common/Caches.py b/configs/common/Caches.py index d86fba246..4692ef537 100644 --- a/configs/common/Caches.py +++ b/configs/common/Caches.py @@ -37,3 +37,10 @@ class L1Cache(BaseCache): tgts_per_mshr = 5 protocol = CoherenceProtocol(protocol='moesi') +class L2Cache(BaseCache): + assoc = 8 + block_size = 64 + latency = 10 + mshrs = 20 + tgts_per_mshr = 12 + diff --git a/configs/example/fs.py b/configs/example/fs.py index a9f1d579a..a70a60b97 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -47,6 +47,7 @@ config_root = os.path.dirname(config_path) parser = optparse.OptionParser() # Benchmark options +parser.add_option("--l2cache", action="store_true") parser.add_option("--dual", action="store_true", help="Simulate two systems attached with an ethernet link") parser.add_option("-b", "--benchmark", action="store", type="string", @@ -93,12 +94,23 @@ else: test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0]) np = options.num_cpus + +if options.l2cache: + test_sys.l2 = L2Cache(size = '2MB') + test_sys.tol2bus = Bus() + test_sys.l2.cpu_side = test_sys.tol2bus.port + test_sys.l2.mem_side = test_sys.membus.port + test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)] for i in xrange(np): if options.caches: test_sys.cpu[i].addPrivateSplitL1Caches(L1Cache(size = '32kB'), L1Cache(size = '64kB')) - test_sys.cpu[i].connectMemPorts(test_sys.membus) + + if options.l2cache: + test_sys.cpu[i].connectMemPorts(test_sys.tol2bus) + else: + test_sys.cpu[i].connectMemPorts(test_sys.membus) if len(bm) == 2: drive_sys = makeLinuxAlphaSystem(drive_mem_mode, bm[1]) diff --git a/configs/splash2/cluster.py b/configs/splash2/cluster.py new file mode 100644 index 000000000..799b85e6c --- /dev/null +++ b/configs/splash2/cluster.py @@ -0,0 +1,303 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +# Simple test script +# +# "m5 test.py" + +import m5 +from m5.objects import * +import os, optparse, sys +m5.AddToPath('../common') + +# -------------------- +# Define Command Line Options +# ==================== + +parser = optparse.OptionParser() + +parser.add_option("-d", "--detailed", action="store_true") +parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-m", "--maxtick", type="int") +parser.add_option("-c", "--numclusters", + help="Number of clusters", type="int") +parser.add_option("-n", "--numcpus", + help="Number of cpus in total", type="int") +parser.add_option("-f", "--frequency", + default = "1GHz", + help="Frequency of each CPU") +parser.add_option("-p", "--protocol", + default="moesi", + help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)") +parser.add_option("--l1size", + default = "32kB") +parser.add_option("--l1latency", + default = 1) +parser.add_option("--l2size", + default = "256kB") +parser.add_option("--l2latency", + default = 10) +parser.add_option("--rootdir", + help="ROot directory of Splash2", + default="/dist/splash2/codes/") +parser.add_option("-b", "--benchmark", + help="Splash 2 benchmark to run") + +(options, args) = parser.parse_args() + +if args: + print "Error: script doesn't take any positional arguments" + sys.exit(1) + +# -------------------- +# Define Splash2 Benchmarks +# ==================== +class Cholesky(LiveProcess): + executable = options.rootdir + '/kernels/cholesky/CHOLESKY' + cmd = 'CHOLESKY -p' + str(options.numcpus) + ' '\ + + options.rootdir + '/kernels/cholesky/inputs/tk23.O' + +class FFT(LiveProcess): + executable = options.rootdir + 'kernels/fft/FFT' + cmd = 'FFT -p' + str(options.numcpus) + ' -m18' + +class LU_contig(LiveProcess): + executable = options.rootdir + 'kernels/lu/contiguous_blocks/LU' + cmd = 'LU -p' + str(options.numcpus) + +class LU_noncontig(LiveProcess): + executable = options.rootdir + 'kernels/lu/non_contiguous_blocks/LU' + cmd = 'LU -p' + str(options.numcpus) + +class Radix(LiveProcess): + executable = options.rootdir + 'kernels/radix/RADIX' + cmd = 'RADIX -n524288 -p' + str(options.numcpus) + +class Barnes(LiveProcess): + executable = options.rootdir + 'apps/barnes/BARNES' + cmd = 'BARNES' + input = options.rootdir + 'apps/barnes/input.p' + str(options.numcpus) + +class FMM(LiveProcess): + executable = options.rootdir + 'apps/fmm/FMM' + cmd = 'FMM' + input = options.rootdir + 'apps/fmm/inputs/input.2048.p' + str(options.numcpus) + +class Ocean_contig(LiveProcess): + executable = options.rootdir + 'apps/ocean/contiguous_partitions/OCEAN' + cmd = 'OCEAN -p' + str(options.numcpus) + +class Ocean_noncontig(LiveProcess): + executable = options.rootdir + 'apps/ocean/non_contiguous_partitions/OCEAN' + cmd = 'OCEAN -p' + str(options.numcpus) + +class Raytrace(LiveProcess): + executable = options.rootdir + 'apps/raytrace/RAYTRACE' + cmd = 'RAYTRACE -p' + str(options.numcpus) + ' ' \ + + options.rootdir + 'apps/raytrace/inputs/teapot.env' + +class Water_nsquared(LiveProcess): + executable = options.rootdir + 'apps/water-nsquared/WATER-NSQUARED' + cmd = 'WATER-NSQUARED' + input = options.rootdir + 'apps/water-nsquared/input.p' + str(options.numcpus) + +class Water_spatial(LiveProcess): + executable = options.rootdir + 'apps/water-spatial/WATER-SPATIAL' + cmd = 'WATER-SPATIAL' + input = options.rootdir + 'apps/water-spatial/input.p' + str(options.numcpus) + + +# -------------------- +# Base L1 Cache Definition +# ==================== + +class L1(BaseCache): + latency = options.l1latency + block_size = 64 + mshrs = 12 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol=options.protocol) + +# ---------------------- +# Base L2 Cache Definition +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = options.l2latency + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +# ---------------------- +# Define the clusters with their cpus +# ---------------------- +class Cluster: + pass + +cpusPerCluster = options.numcpus/options.numclusters + +busFrequency = Frequency(options.frequency) +busFrequency *= cpusPerCluster + +all_cpus = [] +all_l1s = [] +all_l1buses = [] +if options.timing: + clusters = [ Cluster() for i in xrange(options.numclusters)] + for j in xrange(options.numclusters): + clusters[j].id = j + for cluster in clusters: + cluster.clusterbus = Bus(clock=busFrequency) + all_l1buses += [cluster.clusterbus] + cluster.cpus = [TimingSimpleCPU(cpu_id = i + cluster.id, + clock=options.frequency) + for i in xrange(cpusPerCluster)] + all_cpus += cluster.cpus + cluster.l1 = L1(size=options.l1size, assoc = 4) + all_l1s += [cluster.l1] +elif options.detailed: + clusters = [ Cluster() for i in xrange(options.numclusters)] + for j in xrange(options.numclusters): + clusters[j].id = j + for cluster in clusters: + cluster.clusterbus = Bus(clock=busFrequency) + all_l1buses += [cluster.clusterbus] + cluster.cpus = [DerivO3CPU(cpu_id = i + cluster.id, + clock=options.frequency) + for i in xrange(cpusPerCluster)] + all_cpus += cluster.cpus + cluster.l1 = L1(size=options.l1size, assoc = 4) + all_l1s += [cluster.l1] +else: + clusters = [ Cluster() for i in xrange(options.numclusters)] + for j in xrange(options.numclusters): + clusters[j].id = j + for cluster in clusters: + cluster.clusterbus = Bus(clock=busFrequency) + all_l1buses += [cluster.clusterbus] + cluster.cpus = [AtomicSimpleCPU(cpu_id = i + cluster.id, + clock=options.frequency) + for i in xrange(cpusPerCluster)] + all_cpus += cluster.cpus + cluster.l1 = L1(size=options.l1size, assoc = 4) + all_l1s += [cluster.l1] + +# ---------------------- +# Create a system, and add system wide objects +# ---------------------- +system = System(cpu = all_cpus, l1_ = all_l1s, l1bus_ = all_l1buses, physmem = PhysicalMemory(), + membus = Bus(clock = busFrequency)) + +system.toL2bus = Bus(clock = busFrequency) +system.l2 = L2(size = options.l2size, assoc = 8) + +# ---------------------- +# Connect the L2 cache and memory together +# ---------------------- + +system.physmem.port = system.membus.port +system.l2.cpu_side = system.toL2bus.port +system.l2.mem_side = system.membus.port + +# ---------------------- +# Connect the L2 cache and clusters together +# ---------------------- +for cluster in clusters: + cluster.l1.cpu_side = cluster.clusterbus.port + cluster.l1.mem_side = system.toL2bus.port + for cpu in cluster.cpus: + cpu.icache_port = cluster.clusterbus.port + cpu.dcache_port = cluster.clusterbus.port + cpu.mem = cluster.l1 + +# ---------------------- +# Define the root +# ---------------------- + +root = Root(system = system) + +# -------------------- +# Pick the correct Splash2 Benchmarks +# ==================== +if options.benchmark == 'Cholesky': + root.workload = Cholesky() +elif options.benchmark == 'FFT': + root.workload = FFT() +elif options.benchmark == 'LUContig': + root.workload = LU_contig() +elif options.benchmark == 'LUNoncontig': + root.workload = LU_noncontig() +elif options.benchmark == 'Radix': + root.workload = Radix() +elif options.benchmark == 'Barnes': + root.workload = Barnes() +elif options.benchmark == 'FMM': + root.workload = FMM() +elif options.benchmark == 'OceanContig': + root.workload = Ocean_contig() +elif options.benchmark == 'OceanNoncontig': + root.workload = Ocean_noncontig() +elif options.benchmark == 'Raytrace': + root.workload = Raytrace() +elif options.benchmark == 'WaterNSquared': + root.workload = Water_nsquared() +elif options.benchmark == 'WaterSpatial': + root.workload = Water_spatial() +else: + panic("The --benchmark environment variable was set to something" \ + +" improper.\nUse Cholesky, FFT, LUContig, LUNoncontig, Radix" \ + +", Barnes, FMM, OceanContig,\nOceanNoncontig, Raytrace," \ + +" WaterNSquared, or WaterSpatial\n") + +# -------------------- +# Assign the workload to the cpus +# ==================== + +for cluster in clusters: + for cpu in cluster.cpus: + cpu.workload = root.workload + +# ---------------------- +# Run the simulation +# ---------------------- + +if options.timing or options.detailed: + root.system.mem_mode = 'timing' + +# instantiate configuration +m5.instantiate(root) + +# simulate until program terminates +if options.maxtick: + exit_event = m5.simulate(options.maxtick) +else: + exit_event = m5.simulate(m5.MaxTick) + +print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() + diff --git a/configs/splash2/run.py b/configs/splash2/run.py index 7d56cb830..b162e0cc7 100644 --- a/configs/splash2/run.py +++ b/configs/splash2/run.py @@ -262,7 +262,7 @@ m5.instantiate(root) if options.maxtick: exit_event = m5.simulate(options.maxtick) else: - exit_event = m5.simulate(1000000000000) + exit_event = m5.simulate(m5.MaxTick) print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 4c243a2e9..7cbbb0b96 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -97,11 +97,13 @@ CPUProgressEvent::description() #if FULL_SYSTEM BaseCPU::BaseCPU(Params *p) : MemObject(p->name), clock(p->clock), checkInterrupts(true), - params(p), number_of_threads(p->numberOfThreads), system(p->system) + params(p), number_of_threads(p->numberOfThreads), system(p->system), + phase(p->phase) #else BaseCPU::BaseCPU(Params *p) : MemObject(p->name), clock(p->clock), params(p), - number_of_threads(p->numberOfThreads), system(p->system) + number_of_threads(p->numberOfThreads), system(p->system), + phase(p->phase) #endif { // currentTick = curTick; @@ -257,8 +259,9 @@ BaseCPU::regStats() Tick BaseCPU::nextCycle() { - Tick next_tick = curTick + clock - 1; + Tick next_tick = curTick - phase + clock - 1; next_tick -= (next_tick % clock); + next_tick += phase; return next_tick; } @@ -266,11 +269,12 @@ Tick BaseCPU::nextCycle(Tick begin_tick) { Tick next_tick = begin_tick; + next_tick -= (next_tick % clock); + next_tick += phase; while (next_tick < curTick) next_tick += clock; - next_tick -= (next_tick % clock); assert(next_tick >= curTick); return next_tick; } diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 788f77e3a..1d9b6a93b 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -153,6 +153,7 @@ class BaseCPU : public MemObject Tick functionTraceStart; System *system; int cpu_id; + Tick phase; #if FULL_SYSTEM Tick profile; @@ -209,6 +210,8 @@ class BaseCPU : public MemObject System *system; + Tick phase; + #if FULL_SYSTEM /** * Serialize this object to the given output stream. diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 91e073cf0..180f41541 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -81,8 +81,13 @@ MemTest::CpuPort::recvFunctional(PacketPtr pkt) void MemTest::CpuPort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; + } panic("MemTest doesn't expect recvStatusChange callback!"); } @@ -145,6 +150,9 @@ MemTest::MemTest(const string &name, // thread = new SimpleThread(NULL, 0, NULL, 0, mainMem); curTick = 0; + cachePort.snoopRangeSent = false; + funcPort.snoopRangeSent = true; + // Needs to be masked off once we know the block size. traceBlockAddr = _traceAddr; baseAddr1 = 0x100000; diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index 2694efd39..7bf34d827 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -100,6 +100,8 @@ class MemTest : public MemObject : Port(_name, _memtest), memtest(_memtest) { } + bool snoopRangeSent; + protected: virtual bool recvTiming(PacketPtr pkt); @@ -120,6 +122,8 @@ class MemTest : public MemObject CpuPort cachePort; CpuPort funcPort; + bool snoopRangeSent; + class MemTestSenderState : public Packet::SenderState { public: diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 09ccc7f65..5a375a4b8 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -48,6 +48,7 @@ class DerivO3CPU : public AlphaO3CPU<AlphaSimpleImpl> BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param<int> clock; + Param<int> phase; Param<int> numThreads; Param<int> activity; @@ -158,6 +159,7 @@ END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), + INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(numThreads, "number of HW thread contexts"), INIT_PARAM_DFLT(activity, "Initial activity count", 0), diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index cc9a8abf5..04016347a 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -83,6 +83,8 @@ class DefaultFetch : Port(_fetch->name() + "-iport"), fetch(_fetch) { } + bool snoopRangeSent; + protected: /** Atomic version of receive. Panics. */ virtual Tick recvAtomic(PacketPtr pkt); diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 25faa407e..63d22b293 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -70,8 +70,13 @@ template<class Impl> void DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; + } panic("DefaultFetch doesn't expect recvStatusChange callback!"); } @@ -287,6 +292,8 @@ DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) // Name is finally available, so create the port. icachePort = new IcachePort(this); + icachePort->snoopRangeSent = false; + #if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(icachePort); diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 6b12d75b4..7559a36d5 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -298,6 +298,8 @@ class LSQ { : lsq(_lsq) { } + bool snoopRangeSent; + protected: /** Atomic version of receive. Panics. */ virtual Tick recvAtomic(PacketPtr pkt); diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 5e7945c1c..6758e51c8 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -53,9 +53,13 @@ template <class Impl> void LSQ<Impl>::DcachePort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; - + } panic("O3CPU doesn't expect recvStatusChange callback!"); } @@ -97,6 +101,8 @@ LSQ<Impl>::LSQ(Params *params) { DPRINTF(LSQ, "Creating LSQ object.\n"); + dcachePort.snoopRangeSent = false; + //**********************************************/ //************ Handle SMT Parameters ***********/ //**********************************************/ diff --git a/src/cpu/o3/mips/cpu_builder.cc b/src/cpu/o3/mips/cpu_builder.cc index ee9f2b48d..66741aee9 100644 --- a/src/cpu/o3/mips/cpu_builder.cc +++ b/src/cpu/o3/mips/cpu_builder.cc @@ -49,6 +49,7 @@ class DerivO3CPU : public MipsO3CPU<MipsSimpleImpl> BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param<int> clock; +Param<int> phase; Param<int> numThreads; Param<int> activity; @@ -146,6 +147,7 @@ END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), + INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(numThreads, "number of HW thread contexts"), INIT_PARAM_DFLT(activity, "Initial activity count", 0), diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 58dc1fe5f..133b5500b 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -94,7 +94,7 @@ Tick AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt) { //Snooping a coherence request, just return - return curTick; + return 0; } void @@ -107,8 +107,13 @@ AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) void AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; + } panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); } @@ -127,6 +132,9 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) { _status = Idle; + icachePort.snoopRangeSent = false; + dcachePort.snoopRangeSent = false; + ifetch_req = new Request(); ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); @@ -512,6 +520,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) #endif // FULL_SYSTEM Param<int> clock; + Param<int> phase; Param<bool> defer_registration; Param<int> width; @@ -547,6 +556,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) #endif // FULL_SYSTEM INIT_PARAM(clock, "clock speed"), + INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(width, "cpu width"), INIT_PARAM(function_trace, "Enable function trace"), @@ -567,6 +577,7 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->max_loads_all_threads = max_loads_all_threads; params->progress_interval = progress_interval; params->deferRegistration = defer_registration; + params->phase = phase; params->clock = clock; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 166a18127..0df6fe079 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -90,6 +90,8 @@ class AtomicSimpleCPU : public BaseSimpleCPU : Port(_name, _cpu), cpu(_cpu) { } + bool snoopRangeSent; + protected: virtual bool recvTiming(PacketPtr pkt); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index db2c940c0..3648f7613 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -82,8 +82,13 @@ TimingSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) void TimingSimpleCPU::CpuPort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; + } panic("TimingSimpleCPU doesn't expect recvStatusChange callback!"); } @@ -101,6 +106,10 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) cpu_id(p->cpu_id) { _status = Idle; + + icachePort.snoopRangeSent = false; + dcachePort.snoopRangeSent = false; + ifetch_pkt = dcache_pkt = NULL; drainEvent = NULL; fetchEvent = NULL; @@ -160,7 +169,7 @@ TimingSimpleCPU::resume() fetchEvent = new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false); - fetchEvent->schedule(curTick); + fetchEvent->schedule(nextCycle()); } changeState(SimObject::Running); @@ -232,7 +241,7 @@ TimingSimpleCPU::activateContext(int thread_num, int delay) // kick things off by initiating the fetch of the next instruction fetchEvent = new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false); - fetchEvent->schedule(curTick + cycles(delay)); + fetchEvent->schedule(nextCycle(curTick + cycles(delay))); } @@ -281,6 +290,8 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) // memory system takes ownership of packet dcache_pkt = NULL; } + } else { + delete req; } // This will need a new way to tell if it has a dcache attached. @@ -366,6 +377,8 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) dcache_pkt = NULL; } } + } else { + delete req; } // This will need a new way to tell if it's hooked up to a cache or not. @@ -448,6 +461,8 @@ TimingSimpleCPU::fetch() ifetch_pkt = NULL; } } else { + delete ifetch_req; + delete ifetch_pkt; // fetch fault: advance directly to next instruction (fault handler) advanceInst(fault); } @@ -481,13 +496,13 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) _status = Running; - delete pkt->req; - delete pkt; - numCycles += curTick - previousTick; previousTick = curTick; if (getState() == SimObject::Draining) { + delete pkt->req; + delete pkt; + completeDrain(); return; } @@ -519,6 +534,9 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) postExecute(); advanceInst(fault); } + + delete pkt->req; + delete pkt; } void @@ -674,6 +692,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) #endif // FULL_SYSTEM Param<int> clock; + Param<int> phase; Param<bool> defer_registration; Param<int> width; @@ -709,6 +728,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) #endif // FULL_SYSTEM INIT_PARAM(clock, "clock speed"), + INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(width, "cpu width"), INIT_PARAM(function_trace, "Enable function trace"), @@ -730,6 +750,7 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->progress_interval = progress_interval; params->deferRegistration = defer_registration; params->clock = clock; + params->phase = phase; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; params->system = system; diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 408fa315e..fe5d03666 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -82,6 +82,8 @@ class TimingSimpleCPU : public BaseSimpleCPU : Port(_name, _cpu), cpu(_cpu), lat(_lat) { } + bool snoopRangeSent; + protected: virtual Tick recvAtomic(PacketPtr pkt); @@ -166,8 +168,6 @@ class TimingSimpleCPU : public BaseSimpleCPU PacketPtr ifetch_pkt; PacketPtr dcache_pkt; - - int cpu_id; Tick previousTick; diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 38dcfd2e8..b787f79ca 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -91,10 +91,16 @@ Bridge::init() bool Bridge::BridgePort::recvTiming(PacketPtr pkt) { - DPRINTF(BusBridge, "recvTiming: src %d dest %d addr 0x%x\n", - pkt->getSrc(), pkt->getDest(), pkt->getAddr()); + if (pkt->flags & SNOOP_COMMIT) { + DPRINTF(BusBridge, "recvTiming: src %d dest %d addr 0x%x\n", + pkt->getSrc(), pkt->getDest(), pkt->getAddr()); - return otherPort->queueForSendTiming(pkt); + return otherPort->queueForSendTiming(pkt); + } + else { + // Else it's just a snoop, properly return if we are blocking + return !queueFull(); + } } diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 8ea67a0e4..6b5b63f50 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -160,11 +160,12 @@ Bus::recvTiming(PacketPtr pkt) short dest = pkt->getDest(); if (dest == Packet::Broadcast) { - if (timingSnoop(pkt)) { + port = findPort(pkt->getAddr(), pkt->getSrc()); + if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) { bool success; pkt->flags |= SNOOP_COMMIT; - success = timingSnoop(pkt); + success = timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); assert(success); if (pkt->flags & SATISFIED) { @@ -177,7 +178,6 @@ Bus::recvTiming(PacketPtr pkt) occupyBus(pkt); return true; } - port = findPort(pkt->getAddr(), pkt->getSrc()); } else { //Snoop didn't succeed DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); @@ -192,22 +192,28 @@ Bus::recvTiming(PacketPtr pkt) occupyBus(pkt); - if (port->sendTiming(pkt)) { - // Packet was successfully sent. Return true. - // Also take care of retries - if (inRetry) { - DPRINTF(Bus, "Remove retry from list %i\n", retryList.front()); - retryList.front()->onRetryList(false); - retryList.pop_front(); - inRetry = false; + if (port) { + if (port->sendTiming(pkt)) { + // Packet was successfully sent. Return true. + // Also take care of retries + if (inRetry) { + DPRINTF(Bus, "Remove retry from list %i\n", retryList.front()); + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; + } + return true; } + + // Packet not successfully sent. Leave or put it on the retry list. + DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); + addToRetryList(pktPort); + return false; + } + else { + //Forwarding up from responder, just return true; return true; } - - // Packet not successfully sent. Leave or put it on the retry list. - DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); - addToRetryList(pktPort); - return false; } void @@ -290,7 +296,10 @@ Bus::findPort(Addr addr, int id) // we shouldn't be sending this back to where it came from - assert(dest_id != id); + // only on a functional access and then we should terminate + // the cyclical call. + if (dest_id == id) + return 0; return interfaces[dest_id]; } @@ -307,7 +316,18 @@ Bus::findSnoopPorts(Addr addr, int id) if (portSnoopList[i].range == addr && portSnoopList[i].portId != id) { //Careful to not overlap ranges //or snoop will be called more than once on the port - ports.push_back(portSnoopList[i].portId); + + //@todo Fix this hack because ranges are overlapping + //need to make sure we dont't create overlapping ranges + bool hack_overlap = false; + int size = ports.size(); + for (int j=0; j < size; j++) { + if (ports[j] == portSnoopList[i].portId) + hack_overlap = true; + } + + if (!hack_overlap) + ports.push_back(portSnoopList[i].portId); // DPRINTF(Bus, " found snoop addr %#llx on device%d\n", addr, // portSnoopList[i].portId); } @@ -317,17 +337,19 @@ Bus::findSnoopPorts(Addr addr, int id) } Tick -Bus::atomicSnoop(PacketPtr pkt) +Bus::atomicSnoop(PacketPtr pkt, Port *responder) { std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); Tick response_time = 0; while (!ports.empty()) { - Tick response = interfaces[ports.back()]->sendAtomic(pkt); - if (response) { - assert(!response_time); //Multiple responders - response_time = response; + if (interfaces[ports.back()] != responder) { + Tick response = interfaces[ports.back()]->sendAtomic(pkt); + if (response) { + assert(!response_time); //Multiple responders + response_time = response; + } } ports.pop_back(); } @@ -335,26 +357,31 @@ Bus::atomicSnoop(PacketPtr pkt) } void -Bus::functionalSnoop(PacketPtr pkt) +Bus::functionalSnoop(PacketPtr pkt, Port *responder) { std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); + //The packet may be changed by another bus on snoops, restore the id after each + int id = pkt->getSrc(); while (!ports.empty() && pkt->result != Packet::Success) { - interfaces[ports.back()]->sendFunctional(pkt); + if (interfaces[ports.back()] != responder) + interfaces[ports.back()]->sendFunctional(pkt); ports.pop_back(); + pkt->setSrc(id); } } bool -Bus::timingSnoop(PacketPtr pkt) +Bus::timingSnoop(PacketPtr pkt, Port* responder) { std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); bool success = true; while (!ports.empty() && success) { - success = interfaces[ports.back()]->sendTiming(pkt); + if (interfaces[ports.back()] != responder) //Don't call if responder also, once will do + success = interfaces[ports.back()]->sendTiming(pkt); ports.pop_back(); } @@ -370,15 +397,21 @@ Bus::recvAtomic(PacketPtr pkt) DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); + pkt->flags |= SNOOP_COMMIT; // Assume one bus cycle in order to get through. This may have // some clock skew issues yet again... pkt->finishTime = curTick + clock; - Tick snoopTime = atomicSnoop(pkt); + + Port *port = findPort(pkt->getAddr(), pkt->getSrc()); + Tick snoopTime = atomicSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); + if (snoopTime) return snoopTime; //Snoop satisfies it + else if (port) + return port->sendAtomic(pkt); else - return findPort(pkt->getAddr(), pkt->getSrc())->sendAtomic(pkt); + return 0; } /** Function called by the port when the bus is receiving a Functional @@ -389,11 +422,15 @@ Bus::recvFunctional(PacketPtr pkt) DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); - functionalSnoop(pkt); + pkt->flags |= SNOOP_COMMIT; + + Port* port = findPort(pkt->getAddr(), pkt->getSrc()); + functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); // If the snooping found what we were looking for, we're done. - if (pkt->result != Packet::Success) - findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt); + if (pkt->result != Packet::Success && port) { + port->sendFunctional(pkt); + } } /** Function called by the port when the bus is receiving a status change.*/ @@ -451,6 +488,7 @@ Bus::recvStatusChange(Port::Status status, int id) dm.portId = id; dm.range = *iter; + //@todo, make sure we don't overlap ranges DPRINTF(BusAddrRanges, "Adding snoop range %#llx - %#llx for id %d\n", dm.range.start, dm.range.end, id); portSnoopList.push_back(dm); @@ -493,7 +531,7 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end(); dflt_iter++) { resp.push_back(*dflt_iter); - DPRINTF(BusAddrRanges, " -- %#llx : %#llx\n",dflt_iter->start, + DPRINTF(BusAddrRanges, " -- Dflt: %#llx : %#llx\n",dflt_iter->start, dflt_iter->end); } for (portIter = portList.begin(); portIter != portList.end(); portIter++) { @@ -519,6 +557,18 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) portIter->range.start, portIter->range.end); } } + + for (portIter = portSnoopList.begin(); + portIter != portSnoopList.end(); portIter++) + { + if (portIter->portId != id) { + snoop.push_back(portIter->range); + DPRINTF(BusAddrRanges, " -- Snoop: %#llx : %#llx\n", + portIter->range.start, portIter->range.end); + //@todo We need to properly insert snoop ranges + //not overlapping the ranges (multiple) + } + } } unsigned int diff --git a/src/mem/bus.hh b/src/mem/bus.hh index ff1d2545d..c472b6143 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -109,16 +109,16 @@ class Bus : public MemObject std::vector<int> findSnoopPorts(Addr addr, int id); /** Snoop all relevant ports atomicly. */ - Tick atomicSnoop(PacketPtr pkt); + Tick atomicSnoop(PacketPtr pkt, Port* responder); /** Snoop all relevant ports functionally. */ - void functionalSnoop(PacketPtr pkt); + void functionalSnoop(PacketPtr pkt, Port *responder); /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want * the snoop to happen * @return True if succeds. */ - bool timingSnoop(PacketPtr pkt); + bool timingSnoop(PacketPtr pkt, Port *responder); /** Process address range request. * @param resp addresses that we can respond to diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index c16cb6945..3af61375d 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -160,11 +160,14 @@ BaseCache::CachePort::recvRetry() PacketPtr pkt; assert(waitingOnRetry); if (!drainList.empty()) { - DPRINTF(CachePort, "%s attempting to send a retry for response\n", name()); + DPRINTF(CachePort, "%s attempting to send a retry for response (%i waiting)\n" + , name(), drainList.size()); //We have some responses to drain first - if (sendTiming(drainList.front())) { - DPRINTF(CachePort, "%s sucessful in sending a retry for response\n", name()); - drainList.pop_front(); + pkt = drainList.front(); + drainList.pop_front(); + if (sendTiming(pkt)) { + DPRINTF(CachePort, "%s sucessful in sending a retry for" + "response (%i still waiting)\n", name(), drainList.size()); if (!drainList.empty() || !isCpuSide && cache->doMasterRequest() || isCpuSide && cache->doSlaveRequest()) { @@ -175,6 +178,9 @@ BaseCache::CachePort::recvRetry() } waitingOnRetry = false; } + else { + drainList.push_front(pkt); + } // Check if we're done draining once this list is empty if (drainList.empty()) cache->checkDrain(); diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 584c2d5df..ef4955432 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -144,8 +144,6 @@ class BaseCache : public MemObject protected: CachePort *memSidePort; - bool snoopRangesSent; - public: virtual Port *getPort(const std::string &if_name, int idx = -1); @@ -171,10 +169,6 @@ class BaseCache : public MemObject if (status == Port::RangeChange){ if (!isCpuSide) { cpuSidePort->sendStatusChange(Port::RangeChange); - if (!snoopRangesSent) { - snoopRangesSent = true; - memSidePort->sendStatusChange(Port::RangeChange); - } } else { memSidePort->sendStatusChange(Port::RangeChange); @@ -358,7 +352,6 @@ class BaseCache : public MemObject //Start ports at null if more than one is created we should panic cpuSidePort = NULL; memSidePort = NULL; - snoopRangesSent = false; } ~BaseCache() diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 2bc51bf12..19251941f 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -301,7 +301,7 @@ class Packet /** Destructor. */ ~Packet() - { deleteData(); } + { if (staticData || dynamicData) deleteData(); } /** Reinitialize packet address and size from the associated * Request object, and reset other fields that may have been diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py index 2f702a4bf..8037c90af 100644 --- a/src/python/m5/objects/BaseCPU.py +++ b/src/python/m5/objects/BaseCPU.py @@ -47,6 +47,7 @@ class BaseCPU(SimObject): "defer registration with system (for sampling)") clock = Param.Clock(Parent.clock, "clock speed") + phase = Param.Latency("0ns", "clock phase") _mem_ports = [] |