From 023fccff0e7f6e2be144c56567b58845cc7383f5 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 13 Nov 2006 16:09:47 -0500 Subject: Update splash2 config files configs/splash2/run.py: Fix MaxTick for splash configs configs/splash2/cluster.py: Add a config that allows clusters of CPU's to be attached to a single L1 --HG-- extra : convert_revision : 1bb0a0c5f4889316940a9858be90ae2eaa849f1a --- configs/splash2/cluster.py | 303 +++++++++++++++++++++++++++++++++++++++++++++ configs/splash2/run.py | 2 +- 2 files changed, 304 insertions(+), 1 deletion(-) create mode 100644 configs/splash2/cluster.py diff --git a/configs/splash2/cluster.py b/configs/splash2/cluster.py new file mode 100644 index 000000000..799b85e6c --- /dev/null +++ b/configs/splash2/cluster.py @@ -0,0 +1,303 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski + +# Simple test script +# +# "m5 test.py" + +import m5 +from m5.objects import * +import os, optparse, sys +m5.AddToPath('../common') + +# -------------------- +# Define Command Line Options +# ==================== + +parser = optparse.OptionParser() + +parser.add_option("-d", "--detailed", action="store_true") +parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-m", "--maxtick", type="int") +parser.add_option("-c", "--numclusters", + help="Number of clusters", type="int") +parser.add_option("-n", "--numcpus", + help="Number of cpus in total", type="int") +parser.add_option("-f", "--frequency", + default = "1GHz", + help="Frequency of each CPU") +parser.add_option("-p", "--protocol", + default="moesi", + help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)") +parser.add_option("--l1size", + default = "32kB") +parser.add_option("--l1latency", + default = 1) +parser.add_option("--l2size", + default = "256kB") +parser.add_option("--l2latency", + default = 10) +parser.add_option("--rootdir", + help="ROot directory of Splash2", + default="/dist/splash2/codes/") +parser.add_option("-b", "--benchmark", + help="Splash 2 benchmark to run") + +(options, args) = parser.parse_args() + +if args: + print "Error: script doesn't take any positional arguments" + sys.exit(1) + +# -------------------- +# Define Splash2 Benchmarks +# ==================== +class Cholesky(LiveProcess): + executable = options.rootdir + '/kernels/cholesky/CHOLESKY' + cmd = 'CHOLESKY -p' + str(options.numcpus) + ' '\ + + options.rootdir + '/kernels/cholesky/inputs/tk23.O' + +class FFT(LiveProcess): + executable = options.rootdir + 'kernels/fft/FFT' + cmd = 'FFT -p' + str(options.numcpus) + ' -m18' + +class LU_contig(LiveProcess): + executable = options.rootdir + 'kernels/lu/contiguous_blocks/LU' + cmd = 'LU -p' + str(options.numcpus) + +class LU_noncontig(LiveProcess): + executable = options.rootdir + 'kernels/lu/non_contiguous_blocks/LU' + cmd = 'LU -p' + str(options.numcpus) + +class Radix(LiveProcess): + executable = options.rootdir + 'kernels/radix/RADIX' + cmd = 'RADIX -n524288 -p' + str(options.numcpus) + +class Barnes(LiveProcess): + executable = options.rootdir + 'apps/barnes/BARNES' + cmd = 'BARNES' + input = options.rootdir + 'apps/barnes/input.p' + str(options.numcpus) + +class FMM(LiveProcess): + executable = options.rootdir + 'apps/fmm/FMM' + cmd = 'FMM' + input = options.rootdir + 'apps/fmm/inputs/input.2048.p' + str(options.numcpus) + +class Ocean_contig(LiveProcess): + executable = options.rootdir + 'apps/ocean/contiguous_partitions/OCEAN' + cmd = 'OCEAN -p' + str(options.numcpus) + +class Ocean_noncontig(LiveProcess): + executable = options.rootdir + 'apps/ocean/non_contiguous_partitions/OCEAN' + cmd = 'OCEAN -p' + str(options.numcpus) + +class Raytrace(LiveProcess): + executable = options.rootdir + 'apps/raytrace/RAYTRACE' + cmd = 'RAYTRACE -p' + str(options.numcpus) + ' ' \ + + options.rootdir + 'apps/raytrace/inputs/teapot.env' + +class Water_nsquared(LiveProcess): + executable = options.rootdir + 'apps/water-nsquared/WATER-NSQUARED' + cmd = 'WATER-NSQUARED' + input = options.rootdir + 'apps/water-nsquared/input.p' + str(options.numcpus) + +class Water_spatial(LiveProcess): + executable = options.rootdir + 'apps/water-spatial/WATER-SPATIAL' + cmd = 'WATER-SPATIAL' + input = options.rootdir + 'apps/water-spatial/input.p' + str(options.numcpus) + + +# -------------------- +# Base L1 Cache Definition +# ==================== + +class L1(BaseCache): + latency = options.l1latency + block_size = 64 + mshrs = 12 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol=options.protocol) + +# ---------------------- +# Base L2 Cache Definition +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = options.l2latency + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +# ---------------------- +# Define the clusters with their cpus +# ---------------------- +class Cluster: + pass + +cpusPerCluster = options.numcpus/options.numclusters + +busFrequency = Frequency(options.frequency) +busFrequency *= cpusPerCluster + +all_cpus = [] +all_l1s = [] +all_l1buses = [] +if options.timing: + clusters = [ Cluster() for i in xrange(options.numclusters)] + for j in xrange(options.numclusters): + clusters[j].id = j + for cluster in clusters: + cluster.clusterbus = Bus(clock=busFrequency) + all_l1buses += [cluster.clusterbus] + cluster.cpus = [TimingSimpleCPU(cpu_id = i + cluster.id, + clock=options.frequency) + for i in xrange(cpusPerCluster)] + all_cpus += cluster.cpus + cluster.l1 = L1(size=options.l1size, assoc = 4) + all_l1s += [cluster.l1] +elif options.detailed: + clusters = [ Cluster() for i in xrange(options.numclusters)] + for j in xrange(options.numclusters): + clusters[j].id = j + for cluster in clusters: + cluster.clusterbus = Bus(clock=busFrequency) + all_l1buses += [cluster.clusterbus] + cluster.cpus = [DerivO3CPU(cpu_id = i + cluster.id, + clock=options.frequency) + for i in xrange(cpusPerCluster)] + all_cpus += cluster.cpus + cluster.l1 = L1(size=options.l1size, assoc = 4) + all_l1s += [cluster.l1] +else: + clusters = [ Cluster() for i in xrange(options.numclusters)] + for j in xrange(options.numclusters): + clusters[j].id = j + for cluster in clusters: + cluster.clusterbus = Bus(clock=busFrequency) + all_l1buses += [cluster.clusterbus] + cluster.cpus = [AtomicSimpleCPU(cpu_id = i + cluster.id, + clock=options.frequency) + for i in xrange(cpusPerCluster)] + all_cpus += cluster.cpus + cluster.l1 = L1(size=options.l1size, assoc = 4) + all_l1s += [cluster.l1] + +# ---------------------- +# Create a system, and add system wide objects +# ---------------------- +system = System(cpu = all_cpus, l1_ = all_l1s, l1bus_ = all_l1buses, physmem = PhysicalMemory(), + membus = Bus(clock = busFrequency)) + +system.toL2bus = Bus(clock = busFrequency) +system.l2 = L2(size = options.l2size, assoc = 8) + +# ---------------------- +# Connect the L2 cache and memory together +# ---------------------- + +system.physmem.port = system.membus.port +system.l2.cpu_side = system.toL2bus.port +system.l2.mem_side = system.membus.port + +# ---------------------- +# Connect the L2 cache and clusters together +# ---------------------- +for cluster in clusters: + cluster.l1.cpu_side = cluster.clusterbus.port + cluster.l1.mem_side = system.toL2bus.port + for cpu in cluster.cpus: + cpu.icache_port = cluster.clusterbus.port + cpu.dcache_port = cluster.clusterbus.port + cpu.mem = cluster.l1 + +# ---------------------- +# Define the root +# ---------------------- + +root = Root(system = system) + +# -------------------- +# Pick the correct Splash2 Benchmarks +# ==================== +if options.benchmark == 'Cholesky': + root.workload = Cholesky() +elif options.benchmark == 'FFT': + root.workload = FFT() +elif options.benchmark == 'LUContig': + root.workload = LU_contig() +elif options.benchmark == 'LUNoncontig': + root.workload = LU_noncontig() +elif options.benchmark == 'Radix': + root.workload = Radix() +elif options.benchmark == 'Barnes': + root.workload = Barnes() +elif options.benchmark == 'FMM': + root.workload = FMM() +elif options.benchmark == 'OceanContig': + root.workload = Ocean_contig() +elif options.benchmark == 'OceanNoncontig': + root.workload = Ocean_noncontig() +elif options.benchmark == 'Raytrace': + root.workload = Raytrace() +elif options.benchmark == 'WaterNSquared': + root.workload = Water_nsquared() +elif options.benchmark == 'WaterSpatial': + root.workload = Water_spatial() +else: + panic("The --benchmark environment variable was set to something" \ + +" improper.\nUse Cholesky, FFT, LUContig, LUNoncontig, Radix" \ + +", Barnes, FMM, OceanContig,\nOceanNoncontig, Raytrace," \ + +" WaterNSquared, or WaterSpatial\n") + +# -------------------- +# Assign the workload to the cpus +# ==================== + +for cluster in clusters: + for cpu in cluster.cpus: + cpu.workload = root.workload + +# ---------------------- +# Run the simulation +# ---------------------- + +if options.timing or options.detailed: + root.system.mem_mode = 'timing' + +# instantiate configuration +m5.instantiate(root) + +# simulate until program terminates +if options.maxtick: + exit_event = m5.simulate(options.maxtick) +else: + exit_event = m5.simulate(m5.MaxTick) + +print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() + diff --git a/configs/splash2/run.py b/configs/splash2/run.py index 7d56cb830..b162e0cc7 100644 --- a/configs/splash2/run.py +++ b/configs/splash2/run.py @@ -262,7 +262,7 @@ m5.instantiate(root) if options.maxtick: exit_event = m5.simulate(options.maxtick) else: - exit_event = m5.simulate(1000000000000) + exit_event = m5.simulate(m5.MaxTick) print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() -- cgit v1.2.3 From a962fc4f561126bea65f3dd52a7194c5527d255a Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 13 Nov 2006 18:51:16 -0500 Subject: Make CPU models signal to update the snoop ranges --HG-- extra : convert_revision : 717b62510f28a69af99453309fbbb458359eeb2a --- src/cpu/memtest/memtest.cc | 10 +++++++++- src/cpu/memtest/memtest.hh | 4 ++++ src/cpu/o3/fetch.hh | 2 ++ src/cpu/o3/fetch_impl.hh | 9 ++++++++- src/cpu/o3/lsq.hh | 2 ++ src/cpu/o3/lsq_impl.hh | 10 ++++++++-- src/cpu/simple/atomic.cc | 10 +++++++++- src/cpu/simple/atomic.hh | 2 ++ src/cpu/simple/timing.cc | 11 ++++++++++- src/cpu/simple/timing.hh | 4 ++-- 10 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 91e073cf0..180f41541 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -81,8 +81,13 @@ MemTest::CpuPort::recvFunctional(PacketPtr pkt) void MemTest::CpuPort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; + } panic("MemTest doesn't expect recvStatusChange callback!"); } @@ -145,6 +150,9 @@ MemTest::MemTest(const string &name, // thread = new SimpleThread(NULL, 0, NULL, 0, mainMem); curTick = 0; + cachePort.snoopRangeSent = false; + funcPort.snoopRangeSent = true; + // Needs to be masked off once we know the block size. traceBlockAddr = _traceAddr; baseAddr1 = 0x100000; diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index 2694efd39..7bf34d827 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -100,6 +100,8 @@ class MemTest : public MemObject : Port(_name, _memtest), memtest(_memtest) { } + bool snoopRangeSent; + protected: virtual bool recvTiming(PacketPtr pkt); @@ -120,6 +122,8 @@ class MemTest : public MemObject CpuPort cachePort; CpuPort funcPort; + bool snoopRangeSent; + class MemTestSenderState : public Packet::SenderState { public: diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index cc9a8abf5..04016347a 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -83,6 +83,8 @@ class DefaultFetch : Port(_fetch->name() + "-iport"), fetch(_fetch) { } + bool snoopRangeSent; + protected: /** Atomic version of receive. Panics. */ virtual Tick recvAtomic(PacketPtr pkt); diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 25faa407e..63d22b293 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -70,8 +70,13 @@ template void DefaultFetch::IcachePort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; + } panic("DefaultFetch doesn't expect recvStatusChange callback!"); } @@ -287,6 +292,8 @@ DefaultFetch::setCPU(O3CPU *cpu_ptr) // Name is finally available, so create the port. icachePort = new IcachePort(this); + icachePort->snoopRangeSent = false; + #if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(icachePort); diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 6b12d75b4..7559a36d5 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -298,6 +298,8 @@ class LSQ { : lsq(_lsq) { } + bool snoopRangeSent; + protected: /** Atomic version of receive. Panics. */ virtual Tick recvAtomic(PacketPtr pkt); diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 5e7945c1c..6758e51c8 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -53,9 +53,13 @@ template void LSQ::DcachePort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; - + } panic("O3CPU doesn't expect recvStatusChange callback!"); } @@ -97,6 +101,8 @@ LSQ::LSQ(Params *params) { DPRINTF(LSQ, "Creating LSQ object.\n"); + dcachePort.snoopRangeSent = false; + //**********************************************/ //************ Handle SMT Parameters ***********/ //**********************************************/ diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 58dc1fe5f..9a1f831cd 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -107,8 +107,13 @@ AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) void AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; + } panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); } @@ -127,6 +132,9 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) { _status = Idle; + icachePort.snoopRangeSent = false; + dcachePort.snoopRangeSent = false; + ifetch_req = new Request(); ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 166a18127..0df6fe079 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -90,6 +90,8 @@ class AtomicSimpleCPU : public BaseSimpleCPU : Port(_name, _cpu), cpu(_cpu) { } + bool snoopRangeSent; + protected: virtual bool recvTiming(PacketPtr pkt); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index db2c940c0..1ea2df894 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -82,8 +82,13 @@ TimingSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) void TimingSimpleCPU::CpuPort::recvStatusChange(Status status) { - if (status == RangeChange) + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } return; + } panic("TimingSimpleCPU doesn't expect recvStatusChange callback!"); } @@ -101,6 +106,10 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) cpu_id(p->cpu_id) { _status = Idle; + + icachePort.snoopRangeSent = false; + dcachePort.snoopRangeSent = false; + ifetch_pkt = dcache_pkt = NULL; drainEvent = NULL; fetchEvent = NULL; diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 408fa315e..fe5d03666 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -82,6 +82,8 @@ class TimingSimpleCPU : public BaseSimpleCPU : Port(_name, _cpu), cpu(_cpu), lat(_lat) { } + bool snoopRangeSent; + protected: virtual Tick recvAtomic(PacketPtr pkt); @@ -166,8 +168,6 @@ class TimingSimpleCPU : public BaseSimpleCPU PacketPtr ifetch_pkt; PacketPtr dcache_pkt; - - int cpu_id; Tick previousTick; -- cgit v1.2.3 From 356a4f9f591c0b90eaf1b4dedb8d54c19153642b Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 13 Nov 2006 19:00:50 -0500 Subject: Since cpus now send out snoop ranges, remove it from the cache. --HG-- extra : convert_revision : 82882eb131aa66eba9f281b64db21d5cbfefb1b9 --- src/mem/cache/base_cache.hh | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 584c2d5df..ef4955432 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -144,8 +144,6 @@ class BaseCache : public MemObject protected: CachePort *memSidePort; - bool snoopRangesSent; - public: virtual Port *getPort(const std::string &if_name, int idx = -1); @@ -171,10 +169,6 @@ class BaseCache : public MemObject if (status == Port::RangeChange){ if (!isCpuSide) { cpuSidePort->sendStatusChange(Port::RangeChange); - if (!snoopRangesSent) { - snoopRangesSent = true; - memSidePort->sendStatusChange(Port::RangeChange); - } } else { memSidePort->sendStatusChange(Port::RangeChange); @@ -358,7 +352,6 @@ class BaseCache : public MemObject //Start ports at null if more than one is created we should panic cpuSidePort = NULL; memSidePort = NULL; - snoopRangesSent = false; } ~BaseCache() -- cgit v1.2.3 From dfc82bdcfc6526f338969d224ab52fa6700cb295 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 13 Nov 2006 19:12:45 -0500 Subject: Changes needed for a bus from CPU->L1 src/cpu/simple/atomic.cc: Make the atomic cpu return 0 on snoops. --HG-- extra : convert_revision : aad96ad36e0c764c7cfef8b0c8e97877574f5845 --- src/cpu/simple/atomic.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 9a1f831cd..325260609 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -94,7 +94,7 @@ Tick AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt) { //Snooping a coherence request, just return - return curTick; + return 0; } void -- cgit v1.2.3 From 6c5c51338d85f54953d11fefd956baceed37c010 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 13 Nov 2006 19:56:34 -0500 Subject: Fix problems with snoop ranges not working properly on functional accesses src/mem/bus.cc: Actually return the snoop list when asked for it. Don't get stuck in infinite functional loops --HG-- extra : convert_revision : 8e6dafbd10b30d48d28b6b5d4b464e8e8f6a3ddc --- src/mem/bus.cc | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 8ea67a0e4..8b77011bb 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -290,7 +290,10 @@ Bus::findPort(Addr addr, int id) // we shouldn't be sending this back to where it came from - assert(dest_id != id); + // only on a functional access and then we should terminate + // the cyclical call. + if (dest_id == id) + return 0; return interfaces[dest_id]; } @@ -392,8 +395,11 @@ Bus::recvFunctional(PacketPtr pkt) functionalSnoop(pkt); // If the snooping found what we were looking for, we're done. - if (pkt->result != Packet::Success) - findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt); + if (pkt->result != Packet::Success) { + Port* port = findPort(pkt->getAddr(), pkt->getSrc()); + if (port) + port->sendFunctional(pkt); + } } /** Function called by the port when the bus is receiving a status change.*/ @@ -493,7 +499,7 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end(); dflt_iter++) { resp.push_back(*dflt_iter); - DPRINTF(BusAddrRanges, " -- %#llx : %#llx\n",dflt_iter->start, + DPRINTF(BusAddrRanges, " -- Dflt: %#llx : %#llx\n",dflt_iter->start, dflt_iter->end); } for (portIter = portList.begin(); portIter != portList.end(); portIter++) { @@ -519,6 +525,16 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) portIter->range.start, portIter->range.end); } } + + for (portIter = portSnoopList.begin(); + portIter != portSnoopList.end(); portIter++) + { + if (portIter->portId != id) { + snoop.push_back(portIter->range); + DPRINTF(BusAddrRanges, " -- Snoop: %#llx : %#llx\n", + portIter->range.start, portIter->range.end); + } + } } unsigned int -- cgit v1.2.3 From 9b6fd56dd5700e29904ab03f445eafe4fcba7853 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 13 Nov 2006 21:33:01 -0500 Subject: Fix some errors related to snooping and functional access in the bus src/mem/bus.cc: Only call snoop once per port, need to fix it so snoop ranges that overlap aren't added to list Functional accesses that call snoop and it goes to a higher bus may change the src, reset it after each snoop. --HG-- extra : convert_revision : 7276059c798a85cb9d138ccc5531298ecd055c13 --- src/mem/bus.cc | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 8b77011bb..b97a7ddb9 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -310,7 +310,18 @@ Bus::findSnoopPorts(Addr addr, int id) if (portSnoopList[i].range == addr && portSnoopList[i].portId != id) { //Careful to not overlap ranges //or snoop will be called more than once on the port - ports.push_back(portSnoopList[i].portId); + + //@todo Fix this hack because ranges are overlapping + //need to make sure we dont't create overlapping ranges + bool hack_overlap = false; + int size = ports.size(); + for (int j=0; j < size; j++) { + if (ports[j] == portSnoopList[i].portId) + hack_overlap = true; + } + + if (!hack_overlap) + ports.push_back(portSnoopList[i].portId); // DPRINTF(Bus, " found snoop addr %#llx on device%d\n", addr, // portSnoopList[i].portId); } @@ -342,10 +353,13 @@ Bus::functionalSnoop(PacketPtr pkt) { std::vector ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); + //The packet may be changed by another bus on snoops, restore the id after each + int id = pkt->getSrc(); while (!ports.empty() && pkt->result != Packet::Success) { interfaces[ports.back()]->sendFunctional(pkt); ports.pop_back(); + pkt->setSrc(id); } } @@ -457,6 +471,7 @@ Bus::recvStatusChange(Port::Status status, int id) dm.portId = id; dm.range = *iter; + //@todo, make sure we don't overlap ranges DPRINTF(BusAddrRanges, "Adding snoop range %#llx - %#llx for id %d\n", dm.range.start, dm.range.end, id); portSnoopList.push_back(dm); @@ -533,6 +548,8 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) snoop.push_back(portIter->range); DPRINTF(BusAddrRanges, " -- Snoop: %#llx : %#llx\n", portIter->range.start, portIter->range.end); + //@todo We need to properly insert snoop ranges + //not overlapping the ranges (multiple) } } } -- cgit v1.2.3 From 69e183941fcc6ffec9f5766e4144f6b772e9a11c Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 13 Nov 2006 21:34:25 -0500 Subject: If we didn't satisfy all targets, reset the packet we are requesting with. --HG-- extra : convert_revision : 736372131b046eccf3520292fb3c086dc568d918 --- src/mem/cache/miss/miss_queue.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index fe467a8ea..3c4586272 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -612,6 +612,8 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time) if (mshr->hasTargets()) { // Didn't satisfy all the targets, need to resend Packet::Command cmd = mshr->getTarget()->cmd; + mshr->pkt->setDest(Packet::Broadcast); + mshr->pkt->result = Packet::Unknown; mq.markPending(mshr, cmd); mshr->order = order++; cache->setMasterRequest(Request_MSHR, time); -- cgit v1.2.3 From 903a61871438fc872a4762e4d782264cbbd02154 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 13 Nov 2006 22:37:22 -0500 Subject: Fix a bug to handle the fact that a CPU can send Functional accesses while a sendTiming has not returned in the call stack. src/mem/cache/base_cache.cc: Sometimes a functional access comes while waiting on a outstanding packet being sent. This could be because Timing CPU does some post processing on the recvTiming which send functional access. Either the CPU should leave the pkt/req around (so They can be referenced in the mem system). Or the mem system should remove them from outstanding lists and reinsert them if they fail in the sendTiming. I did the later, eventually we should consider doing the former if that is the correct behavior. --HG-- extra : convert_revision : be41e0d2632369dca9d7c15e96e5576d7583fe6a --- src/mem/cache/base_cache.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index c16cb6945..3af61375d 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -160,11 +160,14 @@ BaseCache::CachePort::recvRetry() PacketPtr pkt; assert(waitingOnRetry); if (!drainList.empty()) { - DPRINTF(CachePort, "%s attempting to send a retry for response\n", name()); + DPRINTF(CachePort, "%s attempting to send a retry for response (%i waiting)\n" + , name(), drainList.size()); //We have some responses to drain first - if (sendTiming(drainList.front())) { - DPRINTF(CachePort, "%s sucessful in sending a retry for response\n", name()); - drainList.pop_front(); + pkt = drainList.front(); + drainList.pop_front(); + if (sendTiming(pkt)) { + DPRINTF(CachePort, "%s sucessful in sending a retry for" + "response (%i still waiting)\n", name(), drainList.size()); if (!drainList.empty() || !isCpuSide && cache->doMasterRequest() || isCpuSide && cache->doSlaveRequest()) { @@ -175,6 +178,9 @@ BaseCache::CachePort::recvRetry() } waitingOnRetry = false; } + else { + drainList.push_front(pkt); + } // Check if we're done draining once this list is empty if (drainList.empty()) cache->checkDrain(); -- cgit v1.2.3 From 7babf6b3a89dffdef108c0d68057eabc491dcc50 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Tue, 14 Nov 2006 01:10:36 -0500 Subject: Make cpu's capable of having a phase shift --HG-- extra : convert_revision : 7f082ba5c1cd2445aec731950c31a877aac23a75 --- src/cpu/base.cc | 12 ++++++++---- src/cpu/base.hh | 3 +++ src/cpu/o3/alpha/cpu_builder.cc | 2 ++ src/cpu/o3/mips/cpu_builder.cc | 2 ++ src/cpu/simple/atomic.cc | 3 +++ src/cpu/simple/timing.cc | 7 +++++-- 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 4c243a2e9..7cbbb0b96 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -97,11 +97,13 @@ CPUProgressEvent::description() #if FULL_SYSTEM BaseCPU::BaseCPU(Params *p) : MemObject(p->name), clock(p->clock), checkInterrupts(true), - params(p), number_of_threads(p->numberOfThreads), system(p->system) + params(p), number_of_threads(p->numberOfThreads), system(p->system), + phase(p->phase) #else BaseCPU::BaseCPU(Params *p) : MemObject(p->name), clock(p->clock), params(p), - number_of_threads(p->numberOfThreads), system(p->system) + number_of_threads(p->numberOfThreads), system(p->system), + phase(p->phase) #endif { // currentTick = curTick; @@ -257,8 +259,9 @@ BaseCPU::regStats() Tick BaseCPU::nextCycle() { - Tick next_tick = curTick + clock - 1; + Tick next_tick = curTick - phase + clock - 1; next_tick -= (next_tick % clock); + next_tick += phase; return next_tick; } @@ -266,11 +269,12 @@ Tick BaseCPU::nextCycle(Tick begin_tick) { Tick next_tick = begin_tick; + next_tick -= (next_tick % clock); + next_tick += phase; while (next_tick < curTick) next_tick += clock; - next_tick -= (next_tick % clock); assert(next_tick >= curTick); return next_tick; } diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 788f77e3a..1d9b6a93b 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -153,6 +153,7 @@ class BaseCPU : public MemObject Tick functionTraceStart; System *system; int cpu_id; + Tick phase; #if FULL_SYSTEM Tick profile; @@ -209,6 +210,8 @@ class BaseCPU : public MemObject System *system; + Tick phase; + #if FULL_SYSTEM /** * Serialize this object to the given output stream. diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 09ccc7f65..5a375a4b8 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -48,6 +48,7 @@ class DerivO3CPU : public AlphaO3CPU BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param clock; + Param phase; Param numThreads; Param activity; @@ -158,6 +159,7 @@ END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), + INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(numThreads, "number of HW thread contexts"), INIT_PARAM_DFLT(activity, "Initial activity count", 0), diff --git a/src/cpu/o3/mips/cpu_builder.cc b/src/cpu/o3/mips/cpu_builder.cc index ee9f2b48d..66741aee9 100644 --- a/src/cpu/o3/mips/cpu_builder.cc +++ b/src/cpu/o3/mips/cpu_builder.cc @@ -49,6 +49,7 @@ class DerivO3CPU : public MipsO3CPU BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param clock; +Param phase; Param numThreads; Param activity; @@ -146,6 +147,7 @@ END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), + INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(numThreads, "number of HW thread contexts"), INIT_PARAM_DFLT(activity, "Initial activity count", 0), diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 325260609..133b5500b 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -520,6 +520,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) #endif // FULL_SYSTEM Param clock; + Param phase; Param defer_registration; Param width; @@ -555,6 +556,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) #endif // FULL_SYSTEM INIT_PARAM(clock, "clock speed"), + INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(width, "cpu width"), INIT_PARAM(function_trace, "Enable function trace"), @@ -575,6 +577,7 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->max_loads_all_threads = max_loads_all_threads; params->progress_interval = progress_interval; params->deferRegistration = defer_registration; + params->phase = phase; params->clock = clock; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 1ea2df894..d9839bede 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -169,7 +169,7 @@ TimingSimpleCPU::resume() fetchEvent = new EventWrapper(this, false); - fetchEvent->schedule(curTick); + fetchEvent->schedule(nextCycle()); } changeState(SimObject::Running); @@ -241,7 +241,7 @@ TimingSimpleCPU::activateContext(int thread_num, int delay) // kick things off by initiating the fetch of the next instruction fetchEvent = new EventWrapper(this, false); - fetchEvent->schedule(curTick + cycles(delay)); + fetchEvent->schedule(nextCycle(curTick + cycles(delay))); } @@ -683,6 +683,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) #endif // FULL_SYSTEM Param clock; + Param phase; Param defer_registration; Param width; @@ -718,6 +719,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) #endif // FULL_SYSTEM INIT_PARAM(clock, "clock speed"), + INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(width, "cpu width"), INIT_PARAM(function_trace, "Enable function trace"), @@ -739,6 +741,7 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->progress_interval = progress_interval; params->deferRegistration = defer_registration; params->clock = clock; + params->phase = phase; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; params->system = system; -- cgit v1.2.3 From 4135dd48ed0e876e11a935240d5436a421db12c6 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Tue, 14 Nov 2006 01:12:52 -0500 Subject: Update bus bridges now that snoop ranges are passed properly src/mem/bridge.cc: Update brdiges, now that snoop addresses are properly forwarded. Bus bridge should only handle snoops on the second phase (SNOOP_COMMIT) src/mem/bus.cc: src/mem/bus.hh: Make sure if a busBridge has access to both things that snoop and things that respond it only takes the request once --HG-- extra : convert_revision : 26cc9ee4429be45d4476fa435e0e9a54843c2509 --- src/mem/bridge.cc | 12 +++++++++--- src/mem/bus.cc | 16 +++++++++++----- src/mem/bus.hh | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 38dcfd2e8..b787f79ca 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -91,10 +91,16 @@ Bridge::init() bool Bridge::BridgePort::recvTiming(PacketPtr pkt) { - DPRINTF(BusBridge, "recvTiming: src %d dest %d addr 0x%x\n", - pkt->getSrc(), pkt->getDest(), pkt->getAddr()); + if (pkt->flags & SNOOP_COMMIT) { + DPRINTF(BusBridge, "recvTiming: src %d dest %d addr 0x%x\n", + pkt->getSrc(), pkt->getDest(), pkt->getAddr()); - return otherPort->queueForSendTiming(pkt); + return otherPort->queueForSendTiming(pkt); + } + else { + // Else it's just a snoop, properly return if we are blocking + return !queueFull(); + } } diff --git a/src/mem/bus.cc b/src/mem/bus.cc index b97a7ddb9..a9f95fdc7 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -160,11 +160,12 @@ Bus::recvTiming(PacketPtr pkt) short dest = pkt->getDest(); if (dest == Packet::Broadcast) { - if (timingSnoop(pkt)) { + port = findPort(pkt->getAddr(), pkt->getSrc()); + if (timingSnoop(pkt, port)) { bool success; pkt->flags |= SNOOP_COMMIT; - success = timingSnoop(pkt); + success = timingSnoop(pkt, port); assert(success); if (pkt->flags & SATISFIED) { @@ -177,7 +178,6 @@ Bus::recvTiming(PacketPtr pkt) occupyBus(pkt); return true; } - port = findPort(pkt->getAddr(), pkt->getSrc()); } else { //Snoop didn't succeed DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); @@ -364,14 +364,15 @@ Bus::functionalSnoop(PacketPtr pkt) } bool -Bus::timingSnoop(PacketPtr pkt) +Bus::timingSnoop(PacketPtr pkt, Port* responder) { std::vector ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); bool success = true; while (!ports.empty() && success) { - success = interfaces[ports.back()]->sendTiming(pkt); + if (interfaces[ports.back()] != responder) //Don't call if responder also, once will do + success = interfaces[ports.back()]->sendTiming(pkt); ports.pop_back(); } @@ -387,11 +388,14 @@ Bus::recvAtomic(PacketPtr pkt) DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); + pkt->flags |= SNOOP_COMMIT; // Assume one bus cycle in order to get through. This may have // some clock skew issues yet again... pkt->finishTime = curTick + clock; + Tick snoopTime = atomicSnoop(pkt); + if (snoopTime) return snoopTime; //Snoop satisfies it else @@ -406,6 +410,8 @@ Bus::recvFunctional(PacketPtr pkt) DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); + pkt->flags |= SNOOP_COMMIT; + functionalSnoop(pkt); // If the snooping found what we were looking for, we're done. diff --git a/src/mem/bus.hh b/src/mem/bus.hh index ff1d2545d..7169a8e6d 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -118,7 +118,7 @@ class Bus : public MemObject * the snoop to happen * @return True if succeds. */ - bool timingSnoop(PacketPtr pkt); + bool timingSnoop(PacketPtr pkt, Port *responder); /** Process address range request. * @param resp addresses that we can respond to -- cgit v1.2.3 From ac309071afe9d28e4635337c1c645a8cfc526a0f Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Tue, 14 Nov 2006 01:13:26 -0500 Subject: Update phase param in the .py file for the cpus --HG-- extra : convert_revision : cd2eb8c00adcb34b8693a4d1a66187927c0f6803 --- src/python/m5/objects/BaseCPU.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py index 2f702a4bf..8037c90af 100644 --- a/src/python/m5/objects/BaseCPU.py +++ b/src/python/m5/objects/BaseCPU.py @@ -47,6 +47,7 @@ class BaseCPU(SimObject): "defer registration with system (for sampling)") clock = Param.Clock(Parent.clock, "clock speed") + phase = Param.Latency("0ns", "clock phase") _mem_ports = [] -- cgit v1.2.3 From 8155e61a601a37fb210a7676ba500014a7b5d054 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Tue, 14 Nov 2006 01:38:42 -0500 Subject: Update atomic and functional paths for snoops as well --HG-- extra : convert_revision : 566d73438efb87ca683e4dee23454d880db3dfc7 --- src/mem/bus.cc | 31 +++++++++++++++++-------------- src/mem/bus.hh | 4 ++-- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index a9f95fdc7..92722fd97 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -331,17 +331,19 @@ Bus::findSnoopPorts(Addr addr, int id) } Tick -Bus::atomicSnoop(PacketPtr pkt) +Bus::atomicSnoop(PacketPtr pkt, Port *responder) { std::vector ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); Tick response_time = 0; while (!ports.empty()) { - Tick response = interfaces[ports.back()]->sendAtomic(pkt); - if (response) { - assert(!response_time); //Multiple responders - response_time = response; + if (interfaces[ports.back()] != responder) { + Tick response = interfaces[ports.back()]->sendAtomic(pkt); + if (response) { + assert(!response_time); //Multiple responders + response_time = response; + } } ports.pop_back(); } @@ -349,7 +351,7 @@ Bus::atomicSnoop(PacketPtr pkt) } void -Bus::functionalSnoop(PacketPtr pkt) +Bus::functionalSnoop(PacketPtr pkt, Port *responder) { std::vector ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); @@ -357,7 +359,8 @@ Bus::functionalSnoop(PacketPtr pkt) int id = pkt->getSrc(); while (!ports.empty() && pkt->result != Packet::Success) { - interfaces[ports.back()]->sendFunctional(pkt); + if (interfaces[ports.back()] != responder) + interfaces[ports.back()]->sendFunctional(pkt); ports.pop_back(); pkt->setSrc(id); } @@ -394,12 +397,13 @@ Bus::recvAtomic(PacketPtr pkt) // some clock skew issues yet again... pkt->finishTime = curTick + clock; - Tick snoopTime = atomicSnoop(pkt); + Port *port = findPort(pkt->getAddr(), pkt->getSrc()); + Tick snoopTime = atomicSnoop(pkt, port); if (snoopTime) return snoopTime; //Snoop satisfies it else - return findPort(pkt->getAddr(), pkt->getSrc())->sendAtomic(pkt); + return port->sendAtomic(pkt); } /** Function called by the port when the bus is receiving a Functional @@ -412,13 +416,12 @@ Bus::recvFunctional(PacketPtr pkt) assert(pkt->getDest() == Packet::Broadcast); pkt->flags |= SNOOP_COMMIT; - functionalSnoop(pkt); + Port* port = findPort(pkt->getAddr(), pkt->getSrc()); + functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); // If the snooping found what we were looking for, we're done. - if (pkt->result != Packet::Success) { - Port* port = findPort(pkt->getAddr(), pkt->getSrc()); - if (port) - port->sendFunctional(pkt); + if (pkt->result != Packet::Success && port) { + port->sendFunctional(pkt); } } diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 7169a8e6d..c472b6143 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -109,10 +109,10 @@ class Bus : public MemObject std::vector findSnoopPorts(Addr addr, int id); /** Snoop all relevant ports atomicly. */ - Tick atomicSnoop(PacketPtr pkt); + Tick atomicSnoop(PacketPtr pkt, Port* responder); /** Snoop all relevant ports functionally. */ - void functionalSnoop(PacketPtr pkt); + void functionalSnoop(PacketPtr pkt, Port *responder); /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want * the snoop to happen -- cgit v1.2.3 From 21dc65bc47d4e3572c4aa60b9327047687a77210 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Tue, 14 Nov 2006 10:09:13 -0500 Subject: If all the targets aren't satisfied, reinitialize the packet. --HG-- extra : convert_revision : 5b0a977a162a1b881b97a3185fb386cc76632a4a --- src/mem/cache/miss/miss_queue.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index fe467a8ea..3c4586272 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -612,6 +612,8 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time) if (mshr->hasTargets()) { // Didn't satisfy all the targets, need to resend Packet::Command cmd = mshr->getTarget()->cmd; + mshr->pkt->setDest(Packet::Broadcast); + mshr->pkt->result = Packet::Unknown; mq.markPending(mshr, cmd); mshr->order = order++; cache->setMasterRequest(Request_MSHR, time); -- cgit v1.2.3 From ee777f35c2989609037688e8966efae995f6b3e0 Mon Sep 17 00:00:00 2001 From: Lisa Hsu Date: Tue, 14 Nov 2006 12:59:57 -0500 Subject: interrupts.hh: make a likewise updateIntrInfo for Sparc that's blank so it doesn't fart on build src/arch/sparc/interrupts.hh: make a likewise updateIntrInfo for Sparc that's blank so it doesn't fart on build --HG-- extra : convert_revision : 5f469d0cf897479b42703104cd801a8ef923fcae --- src/arch/sparc/interrupts.hh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/arch/sparc/interrupts.hh b/src/arch/sparc/interrupts.hh index 0072f4184..70838d1ce 100644 --- a/src/arch/sparc/interrupts.hh +++ b/src/arch/sparc/interrupts.hh @@ -79,6 +79,11 @@ namespace SparcISA return NoFault; } + void updateIntrInfo(ThreadContext * tc) + { + + } + void serialize(std::ostream &os) { } -- cgit v1.2.3 From c32f3056f9e513f5efff5eec2210ba7c6dcbc67e Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Tue, 14 Nov 2006 17:15:05 -0500 Subject: Fix bugs around uni-coherence invalidates being propogated properly. src/mem/bus.cc: Make it so that invalidates being sent from the responder up don't call the responder but they should also not Panic. src/mem/packet.hh: If we don't have data in the packet, don't call deleteData: Example: InvalidateRequests never have data. --HG-- extra : convert_revision : 18766bc9f3bb4d852ac651d094254d347abd1634 --- src/mem/bus.cc | 42 +++++++++++++++++++++++++----------------- src/mem/packet.hh | 2 +- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 92722fd97..6b5b63f50 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -161,11 +161,11 @@ Bus::recvTiming(PacketPtr pkt) short dest = pkt->getDest(); if (dest == Packet::Broadcast) { port = findPort(pkt->getAddr(), pkt->getSrc()); - if (timingSnoop(pkt, port)) { + if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) { bool success; pkt->flags |= SNOOP_COMMIT; - success = timingSnoop(pkt, port); + success = timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); assert(success); if (pkt->flags & SATISFIED) { @@ -192,22 +192,28 @@ Bus::recvTiming(PacketPtr pkt) occupyBus(pkt); - if (port->sendTiming(pkt)) { - // Packet was successfully sent. Return true. - // Also take care of retries - if (inRetry) { - DPRINTF(Bus, "Remove retry from list %i\n", retryList.front()); - retryList.front()->onRetryList(false); - retryList.pop_front(); - inRetry = false; + if (port) { + if (port->sendTiming(pkt)) { + // Packet was successfully sent. Return true. + // Also take care of retries + if (inRetry) { + DPRINTF(Bus, "Remove retry from list %i\n", retryList.front()); + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; + } + return true; } + + // Packet not successfully sent. Leave or put it on the retry list. + DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); + addToRetryList(pktPort); + return false; + } + else { + //Forwarding up from responder, just return true; return true; } - - // Packet not successfully sent. Leave or put it on the retry list. - DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); - addToRetryList(pktPort); - return false; } void @@ -398,12 +404,14 @@ Bus::recvAtomic(PacketPtr pkt) pkt->finishTime = curTick + clock; Port *port = findPort(pkt->getAddr(), pkt->getSrc()); - Tick snoopTime = atomicSnoop(pkt, port); + Tick snoopTime = atomicSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); if (snoopTime) return snoopTime; //Snoop satisfies it - else + else if (port) return port->sendAtomic(pkt); + else + return 0; } /** Function called by the port when the bus is receiving a Functional diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 2bc51bf12..19251941f 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -301,7 +301,7 @@ class Packet /** Destructor. */ ~Packet() - { deleteData(); } + { if (staticData || dynamicData) deleteData(); } /** Reinitialize packet address and size from the associated * Request object, and reset other fields that may have been -- cgit v1.2.3 From 069c7c30d1c871aba937a6a4ee7b1146d716ac4b Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 14 Nov 2006 17:22:32 -0500 Subject: Various fixes to delete packet and request a little better. src/cpu/simple/timing.cc: Various updates for deleting requests more properly. The major change is moving the deletion of the fetch request/packet to after the instruction has executed and completed. This should fix a few bugs because Ron's memory system didn't expect a call for a functional access while a timing access was being processed. --HG-- extra : convert_revision : c7cf114bb1ff3cdaa7b0a40ed4c5302dc9d3a522 --- src/cpu/simple/timing.cc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index db2c940c0..d75688ee6 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -281,6 +281,8 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) // memory system takes ownership of packet dcache_pkt = NULL; } + } else { + delete req; } // This will need a new way to tell if it has a dcache attached. @@ -366,6 +368,8 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) dcache_pkt = NULL; } } + } else { + delete req; } // This will need a new way to tell if it's hooked up to a cache or not. @@ -448,6 +452,8 @@ TimingSimpleCPU::fetch() ifetch_pkt = NULL; } } else { + delete ifetch_req; + delete ifetch_pkt; // fetch fault: advance directly to next instruction (fault handler) advanceInst(fault); } @@ -481,13 +487,13 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) _status = Running; - delete pkt->req; - delete pkt; - numCycles += curTick - previousTick; previousTick = curTick; if (getState() == SimObject::Draining) { + delete pkt->req; + delete pkt; + completeDrain(); return; } @@ -519,6 +525,9 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) postExecute(); advanceInst(fault); } + + delete pkt->req; + delete pkt; } void -- cgit v1.2.3 From dbdf2f14ae6b586efd31b73aa4548a38ecee263f Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 15 Nov 2006 18:22:15 -0500 Subject: Add L2 cache option to fs.py --l2cache --HG-- extra : convert_revision : 5bdd1129c3b23e91d441e7b83f6a824ef7740fab --- configs/common/Caches.py | 7 +++++++ configs/example/fs.py | 14 +++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/configs/common/Caches.py b/configs/common/Caches.py index d86fba246..4692ef537 100644 --- a/configs/common/Caches.py +++ b/configs/common/Caches.py @@ -37,3 +37,10 @@ class L1Cache(BaseCache): tgts_per_mshr = 5 protocol = CoherenceProtocol(protocol='moesi') +class L2Cache(BaseCache): + assoc = 8 + block_size = 64 + latency = 10 + mshrs = 20 + tgts_per_mshr = 12 + diff --git a/configs/example/fs.py b/configs/example/fs.py index a9f1d579a..a70a60b97 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -47,6 +47,7 @@ config_root = os.path.dirname(config_path) parser = optparse.OptionParser() # Benchmark options +parser.add_option("--l2cache", action="store_true") parser.add_option("--dual", action="store_true", help="Simulate two systems attached with an ethernet link") parser.add_option("-b", "--benchmark", action="store", type="string", @@ -93,12 +94,23 @@ else: test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0]) np = options.num_cpus + +if options.l2cache: + test_sys.l2 = L2Cache(size = '2MB') + test_sys.tol2bus = Bus() + test_sys.l2.cpu_side = test_sys.tol2bus.port + test_sys.l2.mem_side = test_sys.membus.port + test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)] for i in xrange(np): if options.caches: test_sys.cpu[i].addPrivateSplitL1Caches(L1Cache(size = '32kB'), L1Cache(size = '64kB')) - test_sys.cpu[i].connectMemPorts(test_sys.membus) + + if options.l2cache: + test_sys.cpu[i].connectMemPorts(test_sys.tol2bus) + else: + test_sys.cpu[i].connectMemPorts(test_sys.membus) if len(bm) == 2: drive_sys = makeLinuxAlphaSystem(drive_mem_mode, bm[1]) -- cgit v1.2.3