diff options
-rw-r--r-- | configs/boot/mutex-test.rcS | 6 | ||||
-rw-r--r-- | configs/common/Benchmarks.py | 2 | ||||
-rw-r--r-- | configs/splash2/run.py | 283 | ||||
-rw-r--r-- | src/arch/alpha/faults.cc | 4 | ||||
-rw-r--r-- | src/base/traceflags.py | 1 | ||||
-rw-r--r-- | src/cpu/quiesce_event.cc | 2 | ||||
-rw-r--r-- | src/dev/isa_fake.cc | 1 | ||||
-rw-r--r-- | src/kern/tru64/tru64.hh | 13 | ||||
-rw-r--r-- | src/mem/cache/cache_impl.hh | 32 | ||||
-rw-r--r-- | src/mem/cache/coherence/coherence_protocol.cc | 257 | ||||
-rw-r--r-- | src/mem/cache/coherence/coherence_protocol.hh | 20 | ||||
-rw-r--r-- | src/mem/packet.hh | 53 | ||||
-rw-r--r-- | src/sim/faults.cc | 2 | ||||
-rw-r--r-- | src/sim/pseudo_inst.cc | 20 | ||||
-rwxr-xr-x | util/tracediff | 62 |
15 files changed, 472 insertions, 286 deletions
diff --git a/configs/boot/mutex-test.rcS b/configs/boot/mutex-test.rcS new file mode 100644 index 000000000..acf875368 --- /dev/null +++ b/configs/boot/mutex-test.rcS @@ -0,0 +1,6 @@ +#!/bin/sh + +cd /benchmarks/tests +/sbin/m5 resetstats +./pthread_mutex_test 4 10000 +/sbin/m5 exit diff --git a/configs/common/Benchmarks.py b/configs/common/Benchmarks.py index 1f272517a..eda0e80f9 100644 --- a/configs/common/Benchmarks.py +++ b/configs/common/Benchmarks.py @@ -97,6 +97,8 @@ Benchmarks = { 'ValStreamScale': [SysConfig('micro_streamscale.rcS', '512MB')], 'ValStreamCopy': [SysConfig('micro_streamcopy.rcS', '512MB')], + 'MutexTest': [SysConfig('mutex-test.rcS', '128MB')], + 'bnAn': [SysConfig('/z/saidi/work/m5.newmem.head/configs/boot/bn-app.rcS', '128MB', '/z/saidi/work/bottleneck/bnimg.img')] } diff --git a/configs/splash2/run.py b/configs/splash2/run.py index ebbe14939..93b166d77 100644 --- a/configs/splash2/run.py +++ b/configs/splash2/run.py @@ -1,4 +1,4 @@ -# Copyright (c) 2005 The Regents of The University of Michigan +# Copyright (c) 2005-2006 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -26,54 +26,243 @@ # # Authors: Ron Dreslinski -from m5 import * -import Splash2 - -if 'SYSTEM' not in env: - panic("The SYSTEM environment variable must be set!\ne.g -ESYSTEM=Detailed\n") - -if env['SYSTEM'] == 'Simple': - from SimpleConfig import * - BaseCPU.workload = Parent.workload - SimpleStandAlone.cpu = [ CPU() for i in xrange(int(env['NP'])) ] - root = SimpleStandAlone() -elif env['SYSTEM'] == 'Detailed': - from DetailedConfig import * - BaseCPU.workload = Parent.workload - DetailedStandAlone.cpu = [ DetailedCPU() for i in xrange(int(env['NP'])) ] - root = DetailedStandAlone() +# Splash2 Run Script +# + +import m5 +from m5.objects import * +import os, optparse, sys +m5.AddToPath('../common') + +# -------------------- +# Define Command Line Options +# ==================== + +parser = optparse.OptionParser() + +parser.add_option("-d", "--detailed", action="store_true") +parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-m", "--maxtick", type="int") +parser.add_option("-n", "--numcpus", + help="Number of cpus in total", type="int") +parser.add_option("-f", "--frequency", + default = "1GHz", + help="Frequency of each CPU") +parser.add_option("-p", "--protocol", + default="moesi", + help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)") +parser.add_option("--l1size", + default = "32kB") +parser.add_option("--l1latency", + default = 1) +parser.add_option("--l2size", + default = "256kB") +parser.add_option("--l2latency", + default = 10) +parser.add_option("--rootdir", + help="ROot directory of Splash2", + default="/dist/splash2/codes") +parser.add_option("-b", "--benchmark", + help="Splash 2 benchmark to run") + +(options, args) = parser.parse_args() + +if args: + print "Error: script doesn't take any positional arguments" + sys.exit(1) + +if not options.numcpus: + print "Specify the number of cpus with -n" + sys.exit(1) + +# -------------------- +# Define Splash2 Benchmarks +# ==================== +class Cholesky(LiveProcess): + executable = options.rootdir + '/kernels/cholesky/CHOLESKY' + cmd = 'CHOLESKY -p' + str(options.numcpus) + ' '\ + + options.rootdir + '/kernels/cholesky/inputs/tk23.O' + +class FFT(LiveProcess): + executable = options.rootdir + '/kernels/fft/FFT' + cmd = 'FFT -p' + str(options.numcpus) + ' -m18' + +class LU_contig(LiveProcess): + executable = options.rootdir + '/kernels/lu/contiguous_blocks/LU' + cmd = 'LU -p' + str(options.numcpus) + +class LU_noncontig(LiveProcess): + executable = options.rootdir + '/kernels/lu/non_contiguous_blocks/LU' + cmd = 'LU -p' + str(options.numcpus) + +class Radix(LiveProcess): + executable = options.rootdir + '/kernels/radix/RADIX' + cmd = 'RADIX -n524288 -p' + str(options.numcpus) + +class Barnes(LiveProcess): + executable = options.rootdir + '/apps/barnes/BARNES' + cmd = 'BARNES' + input = options.rootdir + '/apps/barnes/input.p' + str(options.numcpus) + +class FMM(LiveProcess): + executable = options.rootdir + '/apps/fmm/FMM' + cmd = 'FMM' + input = options.rootdir + '/apps/fmm/inputs/input.2048.p' + str(options.numcpus) + +class Ocean_contig(LiveProcess): + executable = options.rootdir + '/apps/ocean/contiguous_partitions/OCEAN' + cmd = 'OCEAN -p' + str(options.numcpus) + +class Ocean_noncontig(LiveProcess): + executable = options.rootdir + '/apps/ocean/non_contiguous_partitions/OCEAN' + cmd = 'OCEAN -p' + str(options.numcpus) + +class Raytrace(LiveProcess): + executable = options.rootdir + '/apps/raytrace/RAYTRACE' + cmd = 'RAYTRACE -p' + str(options.numcpus) + ' ' \ + + options.rootdir + 'apps/raytrace/inputs/teapot.env' + +class Water_nsquared(LiveProcess): + executable = options.rootdir + '/apps/water-nsquared/WATER-NSQUARED' + cmd = 'WATER-NSQUARED' + input = options.rootdir + '/apps/water-nsquared/input.p' + str(options.numcpus) + +class Water_spatial(LiveProcess): + executable = options.rootdir + '/apps/water-spatial/WATER-SPATIAL' + cmd = 'WATER-SPATIAL' + input = options.rootdir + '/apps/water-spatial/input.p' + str(options.numcpus) + + +# -------------------- +# Base L1 Cache Definition +# ==================== + +class L1(BaseCache): + latency = options.l1latency + block_size = 64 + mshrs = 12 + tgts_per_mshr = 8 + protocol = CoherenceProtocol(protocol=options.protocol) + +# ---------------------- +# Base L2 Cache Definition +# ---------------------- + +class L2(BaseCache): + block_size = 64 + latency = options.l2latency + mshrs = 92 + tgts_per_mshr = 16 + write_buffers = 8 + +# ---------------------- +# Define the cpus +# ---------------------- + +busFrequency = Frequency(options.frequency) + +if options.timing: + cpus = [TimingSimpleCPU(cpu_id = i, + clock=options.frequency) + for i in xrange(options.numcpus)] +elif options.detailed: + cpus = [DerivO3CPU(cpu_id = i, + clock=options.frequency) + for i in xrange(options.numcpus)] else: - panic("The SYSTEM environment variable was set to something improper.\n Use Simple or Detailed\n") - -if 'BENCHMARK' not in env: - panic("The BENCHMARK environment variable must be set!\ne.g. -EBENCHMARK=Cholesky\n") - -if env['BENCHMARK'] == 'Cholesky': - root.workload = Splash2.Cholesky() -elif env['BENCHMARK'] == 'FFT': - root.workload = Splash2.FFT() -elif env['BENCHMARK'] == 'LUContig': - root.workload = Splash2.LU_contig() -elif env['BENCHMARK'] == 'LUNoncontig': - root.workload = Splash2.LU_noncontig() -elif env['BENCHMARK'] == 'Radix': - root.workload = Splash2.Radix() -elif env['BENCHMARK'] == 'Barnes': - root.workload = Splash2.Barnes() -elif env['BENCHMARK'] == 'FMM': - root.workload = Splash2.FMM() -elif env['BENCHMARK'] == 'OceanContig': - root.workload = Splash2.Ocean_contig() -elif env['BENCHMARK'] == 'OceanNoncontig': - root.workload = Splash2.Ocean_noncontig() -elif env['BENCHMARK'] == 'Raytrace': - root.workload = Splash2.Raytrace() -elif env['BENCHMARK'] == 'WaterNSquared': - root.workload = Splash2.Water_nsquared() -elif env['BENCHMARK'] == 'WaterSpatial': - root.workload = Splash2.Water_spatial() + cpus = [AtomicSimpleCPU(cpu_id = i, + clock=options.frequency) + for i in xrange(options.numcpus)] + +# ---------------------- +# Create a system, and add system wide objects +# ---------------------- +system = System(cpu = cpus, physmem = PhysicalMemory(), + membus = Bus(clock = busFrequency)) + +system.toL2bus = Bus(clock = busFrequency) +system.l2 = L2(size = options.l2size, assoc = 8) + +# ---------------------- +# Connect the L2 cache and memory together +# ---------------------- + +system.physmem.port = system.membus.port +system.l2.cpu_side = system.toL2bus.port +system.l2.mem_side = system.membus.port + +# ---------------------- +# Connect the L2 cache and clusters together +# ---------------------- +for cpu in cpus: + cpu.addPrivateSplitL1Caches(L1(size = options.l1size, assoc = 1), + L1(size = options.l1size, assoc = 4)) + cpu.mem = cpu.dcache + # connect cpu level-1 caches to shared level-2 cache + cpu.connectMemPorts(system.toL2bus) + + +# ---------------------- +# Define the root +# ---------------------- + +root = Root(system = system) + +# -------------------- +# Pick the correct Splash2 Benchmarks +# ==================== +if options.benchmark == 'Cholesky': + root.workload = Cholesky() +elif options.benchmark == 'FFT': + root.workload = FFT() +elif options.benchmark == 'LUContig': + root.workload = LU_contig() +elif options.benchmark == 'LUNoncontig': + root.workload = LU_noncontig() +elif options.benchmark == 'Radix': + root.workload = Radix() +elif options.benchmark == 'Barnes': + root.workload = Barnes() +elif options.benchmark == 'FMM': + root.workload = FMM() +elif options.benchmark == 'OceanContig': + root.workload = Ocean_contig() +elif options.benchmark == 'OceanNoncontig': + root.workload = Ocean_noncontig() +elif options.benchmark == 'Raytrace': + root.workload = Raytrace() +elif options.benchmark == 'WaterNSquared': + root.workload = Water_nsquared() +elif options.benchmark == 'WaterSpatial': + root.workload = Water_spatial() else: - panic("The BENCHMARK environment variable was set to something" \ + panic("The --benchmark environment variable was set to something" \ +" improper.\nUse Cholesky, FFT, LUContig, LUNoncontig, Radix" \ +", Barnes, FMM, OceanContig,\nOceanNoncontig, Raytrace," \ +" WaterNSquared, or WaterSpatial\n") + +# -------------------- +# Assign the workload to the cpus +# ==================== + +for cpu in cpus: + cpu.workload = root.workload + +# ---------------------- +# Run the simulation +# ---------------------- + +if options.timing or options.detailed: + root.system.mem_mode = 'timing' + +# instantiate configuration +m5.instantiate(root) + +# simulate until program terminates +if options.maxtick: + exit_event = m5.simulate(options.maxtick) +else: + exit_event = m5.simulate() + +print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() + diff --git a/src/arch/alpha/faults.cc b/src/arch/alpha/faults.cc index eef4361fd..7179bf025 100644 --- a/src/arch/alpha/faults.cc +++ b/src/arch/alpha/faults.cc @@ -194,7 +194,8 @@ void PageTableFault::invoke(ThreadContext *tc) // We've accessed the next page if (vaddr > p->stack_min - PageBytes) { - warn("Increasing stack %#x:%#x to %#x:%#x because of access to %#x", + DPRINTF(Stack, + "Increasing stack %#x:%#x to %#x:%#x because of access to %#x", p->stack_min, p->stack_base, p->stack_min - PageBytes, p->stack_base, vaddr); p->stack_min -= PageBytes; @@ -202,6 +203,7 @@ void PageTableFault::invoke(ThreadContext *tc) fatal("Over max stack size for one thread\n"); p->pTable->allocate(p->stack_min, PageBytes); } else { + warn("Page fault on address %#x\n", vaddr); FaultBase::invoke(tc); } } diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 298d22c2b..2402cf361 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -133,6 +133,7 @@ baseFlags = [ 'PciConfigAll', 'Pipeline', 'Printf', + 'Quiesce', 'ROB', 'Regs', 'Rename', diff --git a/src/cpu/quiesce_event.cc b/src/cpu/quiesce_event.cc index 8dd20db02..fa79e6d1e 100644 --- a/src/cpu/quiesce_event.cc +++ b/src/cpu/quiesce_event.cc @@ -28,6 +28,7 @@ * Authors: Kevin Lim */ +#include "cpu/base.hh" #include "cpu/thread_context.hh" #include "cpu/quiesce_event.hh" @@ -39,6 +40,7 @@ EndQuiesceEvent::EndQuiesceEvent(ThreadContext *_tc) void EndQuiesceEvent::process() { + DPRINTF(Quiesce, "activating %s\n", tc->getCpuPtr()->name()); tc->activate(); } diff --git a/src/dev/isa_fake.cc b/src/dev/isa_fake.cc index 4f1771ff9..23761cd10 100644 --- a/src/dev/isa_fake.cc +++ b/src/dev/isa_fake.cc @@ -61,6 +61,7 @@ IsaFake::read(PacketPtr pkt) DPRINTF(Tsunami, "read va=%#x size=%d\n", pkt->getAddr(), pkt->getSize()); switch (pkt->getSize()) { + case sizeof(uint64_t): pkt->set(0xFFFFFFFFFFFFFFFFULL); break; case sizeof(uint32_t): diff --git a/src/kern/tru64/tru64.hh b/src/kern/tru64/tru64.hh index 2ee4014db..6d6d0d96d 100644 --- a/src/kern/tru64/tru64.hh +++ b/src/kern/tru64/tru64.hh @@ -544,12 +544,19 @@ class Tru64 : public OperatingSystem process->next_thread_stack_base -= stack_size; } - stack_base = roundDown(stack_base, VMPageSize); + Addr rounded_stack_base = roundDown(stack_base, VMPageSize); + Addr rounded_stack_size = roundUp(stack_size, VMPageSize); + + DPRINTF(SyscallVerbose, + "stack_create: allocating stack @ %#x size %#x " + "(rounded from %#x, %#x)\n", + rounded_stack_base, rounded_stack_size, + stack_base, stack_size); // map memory - process->pTable->allocate(stack_base, roundUp(stack_size, VMPageSize)); + process->pTable->allocate(rounded_stack_base, rounded_stack_size); - argp->address = gtoh(stack_base); + argp->address = gtoh(rounded_stack_base); argp.copyOut(tc->getMemPort()); return 0; diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 66a9ee554..3c47762f6 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -231,8 +231,16 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) exitSimLoop("A cache reached the maximum miss count"); } } - missQueue->handleMiss(pkt, size, curTick + hitLatency); -// return MA_CACHE_MISS; + + if (pkt->flags & SATISFIED) { + // happens when a store conditional fails because it missed + // the cache completely + if (pkt->needsResponse()) + respond(pkt, curTick+lat); + } else { + missQueue->handleMiss(pkt, size, curTick + hitLatency); + } + return true; } @@ -585,7 +593,7 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update, assert(pkt->result == Packet::Success); } return 0; - } else if (!blk) { + } else if (!blk && !(pkt->flags & SATISFIED)) { // update the cache state and statistics if (mshr || !writes.empty()){ // Can't handle it, return pktuest unsatisfied. @@ -653,18 +661,20 @@ return 0; return memSidePort->sendAtomic(pkt); } } else { - // There was a cache hit. - // Handle writebacks if needed - while (!writebacks.empty()){ - memSidePort->sendAtomic(writebacks.front()); - writebacks.pop_front(); - } + if (blk) { + // There was a cache hit. + // Handle writebacks if needed + while (!writebacks.empty()){ + memSidePort->sendAtomic(writebacks.front()); + writebacks.pop_front(); + } - hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + } return hitLatency; } - fatal("Probe not handled.\n"); + return 0; } diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index 52beb0880..3d7721805 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -206,8 +206,7 @@ bool CoherenceProtocol::supplyTrans(BaseCache *cache, PacketPtr &pkt, CacheBlk *blk, MSHR *mshr, - CacheBlk::State & new_state - ) + CacheBlk::State & new_state) { return true; } @@ -263,182 +262,106 @@ CoherenceProtocol::CoherenceProtocol(const string &name, const bool doUpgrades) : SimObject(name) { - if ((protocol == "mosi" || protocol == "moesi") && !doUpgrades) { - cerr << "CoherenceProtocol: ownership protocols require upgrade transactions" - << "(write miss on owned block generates ReadExcl, which will clobber dirty block)" - << endl; - fatal(""); + // Python should catch this, but in case it doesn't... + if (!(protocol == "msi" || protocol == "mesi" || + protocol == "mosi" || protocol == "moesi")) { + fatal("CoherenceProtocol: unrecognized protocol %s\n", protocol); } - Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq; - Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeReq : Packet::ReadExResp; - -//@todo add in hardware prefetch to this list - if (protocol == "msi") { - // incoming requests: specify outgoing bus request - transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); - transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); - //Prefetching causes a read - transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); - - // on response to given request: specify new state - transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); - transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); - transitionTable[Shared][writeToSharedResp].onResponse(Modified); - - // bus snoop transition functions - transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); - //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - - if (doUpgrades) { - transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); - } + bool hasOwned = (protocol == "mosi" || protocol == "moesi"); + bool hasExclusive = (protocol == "mesi" || protocol == "moesi"); + + if (hasOwned && !doUpgrades) { + fatal("CoherenceProtocol: ownership protocols require upgrade " + "transactions\n(write miss on owned block generates ReadExcl, " + "which will clobber dirty block)\n"); } - else if(protocol == "mesi") { - // incoming requests: specify outgoing bus request - transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); - transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); - //Prefetching causes a read - transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); - - // on response to given request: specify new state - transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); - //It will move into shared if the shared line is asserted in the - //getNewState function - transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); - transitionTable[Shared][writeToSharedResp].onResponse(Modified); - - // bus snoop transition functions - transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); - //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - - if (doUpgrades) { - transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); - } + // set up a few shortcuts to save typing & visual clutter + typedef Packet P; + StateTransition (&tt)[stateMax+1][NUM_MEM_CMDS] = transitionTable; + + P::Command writeToSharedCmd = doUpgrades ? P::UpgradeReq : P::ReadExReq; + P::Command writeToSharedResp = doUpgrades ? P::UpgradeReq : P::ReadExResp; + + // Note that all transitions by default cause a panic. + // Override the valid transitions with the appropriate actions here. + + // + // ----- incoming requests: specify outgoing bus request ----- + // + tt[Invalid][P::ReadReq].onRequest(P::ReadReq); + // we only support write allocate right now + tt[Invalid][P::WriteReq].onRequest(P::ReadExReq); + tt[Shared][P::WriteReq].onRequest(writeToSharedCmd); + if (hasOwned) { + tt[Owned][P::WriteReq].onRequest(writeToSharedCmd); } - else if(protocol == "mosi") { - // incoming requests: specify outgoing bus request - transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); - transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); - transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); - //Prefetching causes a read - transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); - - // on response to given request: specify new state - transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); - transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); - transitionTable[Shared][writeToSharedResp].onResponse(Modified); - transitionTable[Owned][writeToSharedResp].onResponse(Modified); - - // bus snoop transition functions - transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); - //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + // Prefetching causes a read + tt[Invalid][P::SoftPFReq].onRequest(P::ReadReq); + tt[Invalid][P::HardPFReq].onRequest(P::ReadReq); + + // + // ----- on response to given request: specify new state ----- + // + tt[Invalid][P::ReadExResp].onResponse(Modified); + tt[Shared][writeToSharedResp].onResponse(Modified); + // Go to Exclusive state on read response if we have one (will + // move into shared if the shared line is asserted in the + // getNewState function) + // + // originally had this as: + // tt[Invalid][P::ReadResp].onResponse(hasExclusive ? Exclusive: Shared); + // ...but for some reason that caused a link error... + if (hasExclusive) { + tt[Invalid][P::ReadResp].onResponse(Exclusive); + } else { + tt[Invalid][P::ReadResp].onResponse(Shared); + } + if (hasOwned) { + tt[Owned][writeToSharedResp].onResponse(Modified); } - else if(protocol == "moesi") { - // incoming requests: specify outgoing bus request - transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); - transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); - transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); - //Prefetching causes a read - transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); - transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); - - // on response to given request: specify new state - transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); - //It will move into shared if the shared line is asserted in the - //getNewState function - transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); - transitionTable[Shared][writeToSharedResp].onResponse(Modified); - transitionTable[Owned][writeToSharedResp].onResponse(Modified); - - // bus snoop transition functions - transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); - transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); - transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); - transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); - //Transitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); - transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); - transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + // + // ----- bus snoop transition functions ----- + // + tt[Invalid][P::ReadReq].onSnoop(nullTransition); + tt[Invalid][P::ReadExReq].onSnoop(nullTransition); + tt[Invalid][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Invalid][P::WriteInvalidateReq].onSnoop(invalidateTrans); + tt[Shared][P::ReadReq].onSnoop(hasExclusive + ? assertShared : nullTransition); + tt[Shared][P::ReadExReq].onSnoop(invalidateTrans); + tt[Shared][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Shared][P::WriteInvalidateReq].onSnoop(invalidateTrans); + if (doUpgrades) { + tt[Invalid][P::UpgradeReq].onSnoop(nullTransition); + tt[Shared][P::UpgradeReq].onSnoop(invalidateTrans); + } + tt[Modified][P::ReadExReq].onSnoop(supplyAndInvalidateTrans); + tt[Modified][P::ReadReq].onSnoop(hasOwned + ? supplyAndGotoOwnedTrans + : supplyAndGotoSharedTrans); + tt[Modified][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Modified][P::WriteInvalidateReq].onSnoop(invalidateTrans); + + if (hasExclusive) { + tt[Exclusive][P::ReadReq].onSnoop(assertShared); + tt[Exclusive][P::ReadExReq].onSnoop(invalidateTrans); + tt[Exclusive][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Exclusive][P::WriteInvalidateReq].onSnoop(invalidateTrans); } - else { - cerr << "CoherenceProtocol: unrecognized protocol " << protocol - << endl; - fatal(""); + if (hasOwned) { + tt[Owned][P::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + tt[Owned][P::ReadExReq].onSnoop(supplyAndInvalidateTrans); + tt[Owned][P::UpgradeReq].onSnoop(invalidateTrans); + tt[Owned][P::InvalidateReq].onSnoop(invalidateTrans); + tt[Owned][P::WriteInvalidateReq].onSnoop(invalidateTrans); } + + // @todo add in hardware prefetch to this list } diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh index b30fb053b..481277523 100644 --- a/src/mem/cache/coherence/coherence_protocol.hh +++ b/src/mem/cache/coherence/coherence_protocol.hh @@ -211,31 +211,25 @@ class CoherenceProtocol : public SimObject friend class CoherenceProtocol::StateTransition; /** Mask to select status bits relevant to coherence protocol. */ - const static CacheBlk::State - stateMask = BlkValid | BlkWritable | BlkDirty; + static const int stateMask = BlkValid | BlkWritable | BlkDirty; /** The Modified (M) state. */ - const static CacheBlk::State - Modified = BlkValid | BlkWritable | BlkDirty; + static const int Modified = BlkValid | BlkWritable | BlkDirty; /** The Owned (O) state. */ - const static CacheBlk::State - Owned = BlkValid | BlkDirty; + static const int Owned = BlkValid | BlkDirty; /** The Exclusive (E) state. */ - const static CacheBlk::State - Exclusive = BlkValid | BlkWritable; + static const int Exclusive = BlkValid | BlkWritable; /** The Shared (S) state. */ - const static CacheBlk::State - Shared = BlkValid; + static const int Shared = BlkValid; /** The Invalid (I) state. */ - const static CacheBlk::State - Invalid = 0; + static const int Invalid = 0; /** * Maximum state encoding value (used to size transition lookup * table). Could be more than number of states, depends on * encoding of status bits. */ - const static int stateMax = stateMask; + static const int stateMax = stateMask; /** * The table of all possible transitions, organized by starting state and diff --git a/src/mem/packet.hh b/src/mem/packet.hh index d8ad49bdb..cb97dd036 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -51,16 +51,16 @@ typedef uint8_t* PacketDataPtr; typedef std::list<PacketPtr> PacketList; //Coherence Flags -#define NACKED_LINE 1 << 0 -#define SATISFIED 1 << 1 -#define SHARED_LINE 1 << 2 -#define CACHE_LINE_FILL 1 << 3 -#define COMPRESSED 1 << 4 -#define NO_ALLOCATE 1 << 5 -#define SNOOP_COMMIT 1 << 6 +#define NACKED_LINE (1 << 0) +#define SATISFIED (1 << 1) +#define SHARED_LINE (1 << 2) +#define CACHE_LINE_FILL (1 << 3) +#define COMPRESSED (1 << 4) +#define NO_ALLOCATE (1 << 5) +#define SNOOP_COMMIT (1 << 6) //for now. @todo fix later -#define NUM_MEM_CMDS 1 << 11 +#define NUM_MEM_CMDS (1 << 11) /** * A Packet is used to encapsulate a transfer between two objects in * the memory system (e.g., the L1 and L2 cache). (In contrast, a @@ -172,17 +172,17 @@ class Packet // as well. enum CommandAttribute { - IsRead = 1 << 0, - IsWrite = 1 << 1, - IsPrefetch = 1 << 2, - IsInvalidate = 1 << 3, - IsRequest = 1 << 4, - IsResponse = 1 << 5, - NeedsResponse = 1 << 6, + IsRead = 1 << 0, + IsWrite = 1 << 1, + IsPrefetch = 1 << 2, + IsInvalidate = 1 << 3, + IsRequest = 1 << 4, + IsResponse = 1 << 5, + NeedsResponse = 1 << 6, IsSWPrefetch = 1 << 7, IsHWPrefetch = 1 << 8, IsUpgrade = 1 << 9, - HasData = 1 << 10 + HasData = 1 << 10 }; public: @@ -190,27 +190,27 @@ class Packet enum Command { InvalidCmd = 0, - ReadReq = IsRead | IsRequest | NeedsResponse, + ReadReq = IsRead | IsRequest | NeedsResponse, WriteReq = IsWrite | IsRequest | NeedsResponse | HasData, - WriteReqNoAck = IsWrite | IsRequest | HasData, + WriteReqNoAck = IsWrite | IsRequest | HasData, ReadResp = IsRead | IsResponse | NeedsResponse | HasData, - WriteResp = IsWrite | IsResponse | NeedsResponse, + WriteResp = IsWrite | IsResponse | NeedsResponse, Writeback = IsWrite | IsRequest | HasData, SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, SoftPFResp = IsRead | IsResponse | IsSWPrefetch - | NeedsResponse | HasData, + | NeedsResponse | HasData, HardPFResp = IsRead | IsResponse | IsHWPrefetch - | NeedsResponse | HasData, + | NeedsResponse | HasData, InvalidateReq = IsInvalidate | IsRequest, - WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest - | HasData | NeedsResponse, - WriteInvalidateResp = IsWrite | IsInvalidate | IsRequest | NeedsResponse - | IsResponse, + WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest + | HasData | NeedsResponse, + WriteInvalidateResp = IsWrite | IsInvalidate | IsRequest + | NeedsResponse | IsResponse, UpgradeReq = IsInvalidate | IsRequest | IsUpgrade, ReadExReq = IsRead | IsInvalidate | IsRequest | NeedsResponse, ReadExResp = IsRead | IsInvalidate | IsResponse - | NeedsResponse | HasData + | NeedsResponse | HasData }; /** Return the string name of the cmd field (for debugging and @@ -310,6 +310,7 @@ class Packet * multiple transactions. */ void reinitFromRequest() { assert(req->validPaddr); + flags = 0; addr = req->paddr; size = req->size; time = req->time; diff --git a/src/sim/faults.cc b/src/sim/faults.cc index 650b728f7..cea35482a 100644 --- a/src/sim/faults.cc +++ b/src/sim/faults.cc @@ -37,7 +37,7 @@ #if !FULL_SYSTEM void FaultBase::invoke(ThreadContext * tc) { - fatal("fault (%s) detected @ PC 0x%08p", name(), tc->readPC()); + fatal("fault (%s) detected @ PC %p", name(), tc->readPC()); } #else void FaultBase::invoke(ThreadContext * tc) diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index addf897c6..4eb0866a5 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -87,10 +87,15 @@ namespace AlphaPseudo EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent(); + Tick resume = curTick + Clock::Int::ns * ns; + if (quiesceEvent->scheduled()) - quiesceEvent->reschedule(curTick + Clock::Int::ns * ns); + quiesceEvent->reschedule(resume); else - quiesceEvent->schedule(curTick + Clock::Int::ns * ns); + quiesceEvent->schedule(resume); + + DPRINTF(Quiesce, "%s: quiesceNs(%d) until %d\n", + tc->getCpuPtr()->name(), ns, resume); tc->suspend(); if (tc->getKernelStats()) @@ -105,12 +110,15 @@ namespace AlphaPseudo EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent(); + Tick resume = curTick + tc->getCpuPtr()->cycles(cycles); + if (quiesceEvent->scheduled()) - quiesceEvent->reschedule(curTick + - tc->getCpuPtr()->cycles(cycles)); + quiesceEvent->reschedule(resume); else - quiesceEvent->schedule(curTick + - tc->getCpuPtr()->cycles(cycles)); + quiesceEvent->schedule(resume); + + DPRINTF(Quiesce, "%s: quiesceCycles(%d) until %d\n", + tc->getCpuPtr()->name(), cycles, resume); tc->suspend(); if (tc->getKernelStats()) diff --git a/util/tracediff b/util/tracediff index f2377a999..b25efe9b2 100755 --- a/util/tracediff +++ b/util/tracediff @@ -33,23 +33,63 @@ # ******Note that you need to enable some trace flags in the args in order # to do anything useful!****** # -# If you want to pass different arguments to the two instances of m5, -# you can embed them in the simulator arguments like this: +# Script arguments are handled uniformly as follows: +# - If the argument does not contain a '|' character, it is appended +# to both command lines. +# - If the argument has a '|' character in it, the text on either side +# of the '|' is appended to the respective command lines. Note that +# you'll have to quote the arg or escape the '|' with a backslash +# so that the shell doesn't think you're doing a pipe. # -# % tracediff "m5.opt --option1" "m5.opt --option2" [common args] +# In other words, the arguments should look like the command line you +# want to run, with "|" used to list the alternatives for the parts +# that you want to differ between the two runs. +# +# For example: +# +# % tracediff m5.opt --opt1 "--opt2|--opt3" --opt4 +# would compare these two runs: +# m5.opt --opt1 --opt2 --opt4 +# m5.opt --opt1 --opt3 --opt4 +# +# If you want to compare two different simulator binaries, put a '|' +# in the first script argument ("path1/m5.opt|path2/m5.opt"). If you +# want to add arguments to one run only, just put a '|' in with text +# only on one side ("--onlyOn1|"). You can do this with multiple +# arguments together too ("|-a -b -c" adds three args to the second +# run only). # if (@ARGV < 2) { - die "Usage: tracediff sim1 sim2 [--root.trace.flags=X args...]\n"; + die "Usage: tracediff \"sim1|sim2\" [common-arg \"arg1|arg2\" ...]\n"; +} + +foreach $arg (@ARGV) { + @pair = split('\|', $arg, -1); # -1 enables null trailing fields + if ($#pair > 0) { + push @cmd1, $pair[0]; + push @cmd2, $pair[1]; + } else { + push @cmd1, $arg; + push @cmd2, $arg; + } } # First two args are the two simulator binaries to compare -$sim1 = shift; -$sim2 = shift; +$sim1 = shift @cmd1; +$sim2 = shift @cmd2; + +# Everything else is a simulator arg. +$args1 = join(' ', @cmd1); +$args2 = join(' ', @cmd2); -# Everything else on the command line is taken to be an m5 argument to -# be given to both invocations -$simargs = '"' . join('" "', @ARGV) . '"'; +# Common mistake: if you don't set any traceflags this often isn't +# doing what you want. +if ($args1 !~ /--trace-flags/) { + print "****\n"; + print "**** WARNING: no trace flags set... you may not be diffing much!\n"; + print "****\n"; +} # Run individual invocations in separate dirs so output and intermediate # files (particularly config.py and config.ini) don't conflict. @@ -58,8 +98,8 @@ $dir2 = "tracediff-$$-2"; mkdir($dir1) or die "Can't create dir $dir1\n"; mkdir($dir2) or die "Can't create dir $dir2\n"; -$cmd1 = "$sim1 -d $dir1 $simargs 2>&1 |"; -$cmd2 = "$sim2 -d $dir2 $simargs 2>&1 |"; +$cmd1 = "$sim1 -d $dir1 $args1 2>&1 |"; +$cmd2 = "$sim2 -d $dir2 $args2 2>&1 |"; # This only works if you have rundiff in your path. I just edit it # with an explicit path if necessary. |